import json import os import webbrowser import requests import traceback import time import re import glob import shutil from datetime import timedelta from datetime import datetime from datetime import timezone from environment import * from utility import * from video import * class NewsFeed: def __init__(self, pathDb, logger=None): self.pathDb=pathDb self.logger=logger @staticmethod def isResourceAvailable(url): try: response=requests.head(url, timeout=2.5) if not response.ok: return False return True except: return False def getItemsInAmericasNewsRoomFeed(self,url): now=datetime.now() cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb) if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) sections=Sections() videos = {} httpNetRequest=HttpNetRequest() response=httpNetRequest=httpNetRequest.getHttpNetRequest(url) status=response.status_code searchIndex=0 response.close() if status!=200: return None if LOG_HTTP_RESPONSES: self.writeLog(url) self.writeLog(response.text) while -1!= searchIndex: video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex) if video is not None and not (video.description in videos): videos[video.description]=video video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videoList=list(videos.values()) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False) self.writeFeedCache(cachePathFileName,videoList) return (videoList) def getItemsInOutnumberedFeed(self,url): now=datetime.now() cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb) if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) sections=Sections() videos = {} httpNetRequest=HttpNetRequest() response=httpNetRequest=httpNetRequest.getHttpNetRequest(url) status=response.status_code searchIndex=0 response.close() if status!=200: return None if LOG_HTTP_RESPONSES: self.writeLog(url) self.writeLog(response.text) while -1!= searchIndex: video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex) if video is not None and not (video.description in videos): videos[video.description]=video video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videoList=list(videos.values()) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) self.writeFeedCache(cachePathFileName,videoList) return (videoList) def getItemsInFeed(self,url): now=datetime.now() cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb) if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): self.writeLog(f"Loading videos from cache {cachePathFileName}") videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) sections=Sections() videos = {} httpNetRequest=HttpNetRequest() self.writeLog(f"Loading videos from {url}") response=httpNetRequest=httpNetRequest.getHttpNetRequest(url) status=response.status_code searchIndex=0 response.close() if status!=200: return None if LOG_HTTP_RESPONSES: self.writeLog(url) self.writeLog(response.text) while -1!= searchIndex: video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex) if video is not None and not (video.description in videos): videos[video.description]=video video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) self.writeFeedCache(cachePathFileName,videoList) return (videoList) def filterFeedMaxDays(self, videos, days): now = datetime.now() filteredList=[] for video in videos: delta = now - video.getFeedTime() if delta.days <= days: message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}" self.writeLog(message) filteredList.insert(0,video) else: message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}" self.writeLog(message) return filteredList def getUSItemsInFeed(self,url): now=datetime.now() cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb) if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) sections=Sections() videos = {} httpNetRequest=HttpNetRequest() response=httpNetRequest.getHttpNetRequest(url) status=response.status_code searchIndex=0 response.close() if status!=200: return None if LOG_HTTP_RESPONSES: self.writeLog(url) self.writeLog(response.text) while -1!= searchIndex: videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex) if videoId is None: continue url='https://video.foxnews.com/v/'+videoId httpNetRequest=HttpNetRequest() innerResponse=httpNetRequest.getHttpNetRequest(url) status=innerResponse.status_code innerResponse.close() if status!=200: continue video=sections.getVideoContentInSection(innerResponse.text) if video is not None and not (video.description in videos): videos[video.description]=video video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videoList=list(videos.values()) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) self.writeFeedCache(cachePathFileName,videoList) return (videoList) def getExclusiveItemsInFeed(self,url): now=datetime.now() cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb) if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) sections=Sections() videos = {} httpNetRequest=HttpNetRequest() response=httpNetRequest.getHttpNetRequest(url) status=response.status_code searchIndex=0 response.close() if status!=200: return None if LOG_HTTP_RESPONSES: self.writeLog(url) self.writeLog(response.Text) while -1!= searchIndex: videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex) if videoId is None: continue url='https://video.foxnews.com/v/'+videoId httpNetRequest=HttpNetRequest() innerResponse=httpNetRequest.getHttpNetRequest(url) status=innerResponse.status_code innerResponse.close() if status!=200: continue video=sections.getVideoContentInSection(innerResponse.text) if video is not None and not (video.description in videos): videos[video.description]=video video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videoList=list(videos.values()) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) self.writeFeedCache(cachePathFileName,videoList) return (videoList) def getItemsInArchiveFeed(self,url,archiveDbFileName): cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb) videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) return(None) def readFeedCache(self,pathFileName): try: videos=[] with open(pathFileName,"r",encoding='utf-8') as inputStream: for line in inputStream: video=Video.fromString(line) videos.append(video) inputStream.close() return(videos) except: self.writeLog(traceback.format_exc()) return(None) def writeFeedCache(self,pathFileName,videos): try: with open(pathFileName,"w",encoding='utf-8') as outputStream: for video in videos: outputStream.write(video.toString()+"\n") outputStream.close() return(videos) except: self.writeLog(traceback.format_exc()) return(videos) def isFeedCacheAvailable(self,pathFileName,expireMinutes): try: self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName)) if not os.path.isfile(pathFileName): return(False) modifiedTime=os.path.getmtime(pathFileName) convertTime=time.localtime(modifiedTime) formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime) fileDateTime=DateTimeHelper.strptime(formatTime,'%d%m%Y %H:%M:%S') currentTime=datetime.now() timedelta=currentTime-fileDateTime hours, hremainder = divmod(timedelta.seconds,3600) minutes, mremainder = divmod(timedelta.seconds,60) self.writeLog('file is = "{age}" hours old'.format(age=hours)) self.writeLog('file is = "{age}" minutes old'.format(age=minutes)) if hours > 1 or minutes > expireMinutes: self.archiveFile(pathFileName) return(False) return (True) except: self.writeLog(traceback.format_exc()); return(False) def archiveFile(self, pathFileName): if not os.path.isfile(pathFileName): return(False) archiveFile=StringHelper.betweenString(pathFileName, None, '.txt') archiveFileLike=archiveFile+'.txt.*' files = glob.glob(archiveFileLike) index=len(files)+1 archiveFileName=archiveFile+'.txt.'+str(index) print('archiveFile: Copying "{pathFileName}" to "{archiveFileName}".'.format(pathFileName=pathFileName,archiveFileName=archiveFileName)) shutil.copy(pathFileName,archiveFileName) os.remove(pathFileName) return(True) def writeLog(self,message): if self.logger is not None: self.logger.write(message) else: print(message) class Sections: def __init__(self): self.dummy=None def getItemsInSection(self, strInput, sectionName, searchIndex): video=None startSection='<'+sectionName endSection=''+sectionName startIndex=strInput.find(startSection,searchIndex) if -1 == startIndex: searchIndex=-1 return video, searchIndex endIndex=strInput.find(endSection,startIndex) if -1 == endIndex: searchIndex=-1 return video, searchIndex searchIndex=endIndex+len(endSection) strContainingString=strInput[startIndex:endIndex+1+len(endSection)] if not strContainingString or strContainingString=="": return video, searchIndex indexPreview=strContainingString.find("preview=\"") if -1 == indexPreview: return video, searchIndex previewUrl=strContainingString[indexPreview:] previewUrl=self.betweenString(previewUrl,'"','"') if "tokenvod" in previewUrl: return video, searchIndex indexDescription=strContainingString.index("alt=\"") description=strContainingString[indexDescription:] description=self.betweenString(description,'"','"') description=self.removeHtml(description) description=description.replace("- Fox News","") if "vod.foxbusiness" in description: return video, searchIndex indexDuration=strContainingString.index("