diff --git a/newsfeed.py b/newsfeed.py index c41bffd..7674e53 100755 --- a/newsfeed.py +++ b/newsfeed.py @@ -30,93 +30,105 @@ class NewsFeed: return False def getItemsInAmericasNewsRoomFeed(self,url): - now=datetime.now() - cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb) - if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): - videos=self.readFeedCache(cachePathFileName) - if videos is not None: - return(videos) - sections=Sections() - videos = {} - httpNetRequest=HttpNetRequest() - response=httpNetRequest=httpNetRequest.getHttpNetRequest(url) - status=response.status_code - searchIndex=0 - response.close() - if status!=200: - return None - if LOG_HTTP_RESPONSES: - self.writeLog(url) - self.writeLog(response.text) - while -1!= searchIndex: - video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex) - if video is not None and not (video.description in videos): - videos[video.description]=video - video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) - videoList=list(videos.values()) - videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False) - self.writeFeedCache(cachePathFileName,videoList) - return (videoList) + response = None + try: + now=datetime.now() + cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb) + if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): + videos=self.readFeedCache(cachePathFileName) + if videos is not None: + return(videos) + sections=Sections() + videos = {} + httpNetRequest=HttpNetRequest() + response=httpNetRequest.getHttpNetRequest(url) + status=response.status_code + searchIndex=0 + if status!=200: + return None + if LOG_HTTP_RESPONSES: + self.writeLog(url) + self.writeLog(response.text) + while -1!= searchIndex: + video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex) + if video is not None and not (video.description in videos): + videos[video.description]=video + video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) + videoList=list(videos.values()) + videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False) + self.writeFeedCache(cachePathFileName,videoList) + return (videoList) + finally: + if None!= response: + response.close() def getItemsInOutnumberedFeed(self,url): - now=datetime.now() - cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb) - if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): - videos=self.readFeedCache(cachePathFileName) - if videos is not None: - return(videos) - sections=Sections() - videos = {} - httpNetRequest=HttpNetRequest() - response=httpNetRequest=httpNetRequest.getHttpNetRequest(url) - status=response.status_code - searchIndex=0 - response.close() - if status!=200: - return None - if LOG_HTTP_RESPONSES: - self.writeLog(url) - self.writeLog(response.text) - while -1!= searchIndex: - video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex) - if video is not None and not (video.description in videos): - videos[video.description]=video - video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) - videoList=list(videos.values()) - videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) - self.writeFeedCache(cachePathFileName,videoList) - return (videoList) + response = None + try: + now=datetime.now() + cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb) + if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): + videos=self.readFeedCache(cachePathFileName) + if videos is not None: + return(videos) + sections=Sections() + videos = {} + httpNetRequest=HttpNetRequest() + response=httpNetRequest.getHttpNetRequest(url) + status=response.status_code + searchIndex=0 + if status!=200: + return None + if LOG_HTTP_RESPONSES: + self.writeLog(url) + self.writeLog(response.text) + while -1!= searchIndex: + video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex) + if video is not None and not (video.description in videos): + videos[video.description]=video + video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) + videoList=list(videos.values()) + videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) + self.writeFeedCache(cachePathFileName,videoList) + return (videoList) + finally: + if None!=response: + response.close() def getItemsInFeed(self,url): - now=datetime.now() - cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb) - if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): - self.writeLog(f"Loading videos from cache {cachePathFileName}") - videos=self.readFeedCache(cachePathFileName) - if videos is not None: - return(videos) - sections=Sections() - videos = {} - httpNetRequest=HttpNetRequest() - self.writeLog(f"Loading videos from {url}") - response=httpNetRequest=httpNetRequest.getHttpNetRequest(url) - status=response.status_code - searchIndex=0 - response.close() - if status!=200: - return None - if LOG_HTTP_RESPONSES: - self.writeLog(url) - self.writeLog(response.text) - while -1!= searchIndex: - video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex) - if video is not None and not (video.description in videos): - videos[video.description]=video - video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) - videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS) - videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) - self.writeFeedCache(cachePathFileName,videoList) - return (videoList) + response = None + try: + now=datetime.now() + cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb) + if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): + self.writeLog(f"Loading videos from cache {cachePathFileName}") + videos=self.readFeedCache(cachePathFileName) + if videos is not None: + return(videos) + sections=Sections() + videos = {} + httpNetRequest=HttpNetRequest() + self.writeLog(f"Loading videos from {url}") + response=httpNetRequest.getHttpNetRequest(url) + status=response.status_code + searchIndex=0 + if status!=200: + return None + if LOG_HTTP_RESPONSES: + self.writeLog(url) + self.writeLog(response.text) + while -1!= searchIndex: + video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex) + if video is not None and not (video.description in videos): + videos[video.description]=video + video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) + videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS) + videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) + self.writeFeedCache(cachePathFileName,videoList) + return (videoList) + finally: + if None!=response: + response.close() def filterFeedMaxDays(self, videos, days): now = datetime.now() @@ -126,89 +138,97 @@ class NewsFeed: if delta.days <= days: message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}" self.writeLog(message) - filteredList.insert(0,video) + filteredList.append(video) else: message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}" self.writeLog(message) return filteredList def getUSItemsInFeed(self,url): - now=datetime.now() - cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb) - if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): - videos=self.readFeedCache(cachePathFileName) - if videos is not None: - return(videos) - sections=Sections() - videos = {} - httpNetRequest=HttpNetRequest() - response=httpNetRequest.getHttpNetRequest(url) - status=response.status_code - searchIndex=0 - response.close() - if status!=200: - return None - if LOG_HTTP_RESPONSES: - self.writeLog(url) - self.writeLog(response.text) - while -1!= searchIndex: - videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex) - if videoId is None: - continue - url='https://video.foxnews.com/v/'+videoId + response = None + try: + now=datetime.now() + cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb) + if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): + videos=self.readFeedCache(cachePathFileName) + if videos is not None: + return(videos) + sections=Sections() + videos = {} httpNetRequest=HttpNetRequest() - innerResponse=httpNetRequest.getHttpNetRequest(url) - status=innerResponse.status_code - innerResponse.close() + response=httpNetRequest.getHttpNetRequest(url) + status=response.status_code + searchIndex=0 if status!=200: - continue - video=sections.getVideoContentInSection(innerResponse.text) - if video is not None and not (video.description in videos): - videos[video.description]=video - video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) - videoList=list(videos.values()) - videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) - self.writeFeedCache(cachePathFileName,videoList) - return (videoList) + return None + if LOG_HTTP_RESPONSES: + self.writeLog(url) + self.writeLog(response.text) + while -1!= searchIndex: + videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex) + if videoId is None: + continue + videoUrl='https://video.foxnews.com/v/'+videoId + httpNetRequest=HttpNetRequest() + innerResponse=httpNetRequest.getHttpNetRequest(videoUrl) + status=innerResponse.status_code + innerResponse.close() + if status!=200: + continue + video=sections.getVideoContentInSection(innerResponse.text) + if video is not None and not (video.description in videos): + videos[video.description]=video + video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) + videoList=list(videos.values()) + videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) + self.writeFeedCache(cachePathFileName,videoList) + return (videoList) + finally: + if None!=response: + response.close() def getExclusiveItemsInFeed(self,url): - now=datetime.now() - cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb) - if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): - videos=self.readFeedCache(cachePathFileName) - if videos is not None: - return(videos) - sections=Sections() - videos = {} - httpNetRequest=HttpNetRequest() - response=httpNetRequest.getHttpNetRequest(url) - status=response.status_code - searchIndex=0 - response.close() - if status!=200: - return None - if LOG_HTTP_RESPONSES: - self.writeLog(url) - self.writeLog(response.Text) - while -1!= searchIndex: - videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex) - if videoId is None: - continue - url='https://video.foxnews.com/v/'+videoId + response = None + try: + now=datetime.now() + cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb) + if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): + videos=self.readFeedCache(cachePathFileName) + if videos is not None: + return(videos) + sections=Sections() + videos = {} httpNetRequest=HttpNetRequest() - innerResponse=httpNetRequest.getHttpNetRequest(url) - status=innerResponse.status_code - innerResponse.close() + response=httpNetRequest.getHttpNetRequest(url) + status=response.status_code + searchIndex=0 if status!=200: - continue - video=sections.getVideoContentInSection(innerResponse.text) - if video is not None and not (video.description in videos): - videos[video.description]=video - video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) - videoList=list(videos.values()) - videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) - self.writeFeedCache(cachePathFileName,videoList) - return (videoList) + return None + if LOG_HTTP_RESPONSES: + self.writeLog(url) + self.writeLog(response.text) + while -1!= searchIndex: + videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex) + if videoId is None: + continue + videoUrl='https://video.foxnews.com/v/'+videoId + httpNetRequest=HttpNetRequest() + innerResponse=httpNetRequest.getHttpNetRequest(videoUrl) + status=innerResponse.status_code + innerResponse.close() + if status!=200: + continue + video=sections.getVideoContentInSection(innerResponse.text) + if video is not None and not (video.description in videos): + videos[video.description]=video + video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) + videoList=list(videos.values()) + videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) + self.writeFeedCache(cachePathFileName,videoList) + return (videoList) + finally: + if None!=response: + response.close() def getItemsInArchiveFeed(self,url,archiveDbFileName): cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb) @@ -220,11 +240,11 @@ class NewsFeed: def readFeedCache(self,pathFileName): try: videos=[] +# 'with' will automatically close the stream with open(pathFileName,"r",encoding='utf-8') as inputStream: for line in inputStream: video=Video.fromString(line) videos.append(video) - inputStream.close() return(videos) except: self.writeLog(traceback.format_exc()) @@ -235,35 +255,36 @@ class NewsFeed: with open(pathFileName,"w",encoding='utf-8') as outputStream: for video in videos: outputStream.write(video.toString()+"\n") - outputStream.close() +# 'with' will automatically close the stream return(videos) except: self.writeLog(traceback.format_exc()) return(videos) - def isFeedCacheAvailable(self,pathFileName,expireMinutes): + def isFeedCacheAvailable(self, pathFileName, expireMinutes): try: - self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName)) - if not os.path.isfile(pathFileName): - return(False) - modifiedTime=os.path.getmtime(pathFileName) - convertTime=time.localtime(modifiedTime) - formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime) - fileDateTime=time.strptime(formatTime,'%d%m%Y %H:%M:%S') - currentTime=datetime.now() - timedelta=currentTime-datetime(*(fileDateTime[0:6])) - hours, hremainder = divmod(timedelta.seconds,3600) - minutes, mremainder = divmod(timedelta.seconds,60) - self.writeLog('file is = "{age}" hours old'.format(age=hours)) - self.writeLog('file is = "{age}" minutes old'.format(age=minutes)) - if hours > 1 or minutes > expireMinutes: - self.archiveFile(pathFileName) - return(False) - return (True) + self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName)) + if not os.path.isfile(pathFileName): + return False + modifiedTime = os.path.getmtime(pathFileName) + convertTime = time.localtime(modifiedTime) + formatTime = time.strftime('%d%m%Y %H:%M:%S', convertTime) + fileDateTime = time.strptime(formatTime, '%d%m%Y %H:%M:%S') + currentTime = datetime.now() + elapsed = currentTime - datetime(*(fileDateTime[0:6])) + totalSeconds = int(elapsed.total_seconds()) + hours, remainder = divmod(totalSeconds, 3600) + minutes, _ = divmod(remainder, 60) + self.writeLog('file is = "{age}" hours old'.format(age=hours)) + self.writeLog('file is = "{age}" minutes old'.format(age=minutes)) + if hours > 1 or minutes > expireMinutes: + self.archiveFile(pathFileName) + return False + return True except: - self.writeLog(traceback.format_exc()); - return(False) - + self.writeLog(traceback.format_exc()) + return False + def archiveFile(self, pathFileName): if not os.path.isfile(pathFileName): return(False) @@ -316,25 +337,35 @@ class Sections: if "tokenvod" in previewUrl: return video, searchIndex - indexDescription=strContainingString.index("alt=\"") +# Handle video description + indexDescription=strContainingString.find("alt=\"") + if -1 == indexDescription: + return video, searchIndex description=strContainingString[indexDescription:] description=self.betweenString(description,'"','"') description=self.removeHtml(description) description=description.replace("- Fox News","") if "vod.foxbusiness" in description: return video, searchIndex - indexDuration=strContainingString.index("
") + +# Handle video duration + indexDuration=strContainingString.find("
") if -1 != indexDuration: strDuration=strContainingString[indexDuration:] strDuration=self.betweenString(strDuration,">","<") description=description+" - "+strDuration - indexPublication=strContainingString.index("
") + +# Handle video publication + strPublication = "" + indexPublication=strContainingString.find("
") if -1 != indexPublication: strPublication=strContainingString[indexPublication:] strPublication=self.betweenString(strPublication,"") description=description+" ("+strPublication+")" + +# Handle the icon icon=None - indexIcon=strContainingString.index("srcset=") + indexIcon=strContainingString.find("srcset=") if -1 != indexIcon: icon=strContainingString[indexIcon:] icon=self.betweenString(icon,"\"","\"")