import json import os import webbrowser import requests import traceback import time import re import glob import shutil from datetime import timedelta from datetime import datetime from datetime import timezone from environment import * from utility import * from video import * class NewsFeed: def __init__(self, pathDb, logger=None): self.pathDb=pathDb self.logger=logger @staticmethod def isResourceAvailable(url): try: response=requests.head(url, timeout=2.5) if not response.ok: return False return True except: return False def getItemsInAmericasNewsRoomFeed(self,url): response = None try: now=datetime.now() cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb) if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) sections=Sections() videos = {} httpNetRequest=HttpNetRequest() response=httpNetRequest.getHttpNetRequest(url) status=response.status_code searchIndex=0 if status!=200: return None if LOG_HTTP_RESPONSES: self.writeLog(url) self.writeLog(response.text) while -1!= searchIndex: video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex) if video is not None and not (video.description in videos): videos[video.description]=video video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videoList=list(videos.values()) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False) self.writeFeedCache(cachePathFileName,videoList) return (videoList) finally: if None!= response: response.close() def getItemsInOutnumberedFeed(self,url): response = None try: now=datetime.now() cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb) if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) sections=Sections() videos = {} httpNetRequest=HttpNetRequest() response=httpNetRequest.getHttpNetRequest(url) status=response.status_code searchIndex=0 if status!=200: return None if LOG_HTTP_RESPONSES: self.writeLog(url) self.writeLog(response.text) while -1!= searchIndex: video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex) if video is not None and not (video.description in videos): videos[video.description]=video video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videoList=list(videos.values()) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) self.writeFeedCache(cachePathFileName,videoList) return (videoList) finally: if None!=response: response.close() def getItemsInFeed(self,url): response = None try: now=datetime.now() cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb) if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): self.writeLog(f"Loading videos from cache {cachePathFileName}") videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) sections=Sections() videos = {} httpNetRequest=HttpNetRequest() self.writeLog(f"Loading videos from {url}") response=httpNetRequest.getHttpNetRequest(url) status=response.status_code searchIndex=0 if status!=200: return None if LOG_HTTP_RESPONSES: self.writeLog(url) self.writeLog(response.text) while -1!= searchIndex: video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex) if video is not None and not (video.description in videos): videos[video.description]=video video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) self.writeFeedCache(cachePathFileName,videoList) return (videoList) finally: if None!=response: response.close() def filterFeedMaxDays(self, videos, days): now = datetime.now() filteredList=[] for video in videos: delta = now - video.getFeedTime() if delta.days <= days: message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}" self.writeLog(message) filteredList.append(video) else: message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}" self.writeLog(message) return filteredList def getUSItemsInFeed(self,url): response = None try: now=datetime.now() cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb) if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) sections=Sections() videos = {} httpNetRequest=HttpNetRequest() response=httpNetRequest.getHttpNetRequest(url) status=response.status_code searchIndex=0 if status!=200: return None if LOG_HTTP_RESPONSES: self.writeLog(url) self.writeLog(response.text) while -1!= searchIndex: videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex) if videoId is None: continue videoUrl='https://video.foxnews.com/v/'+videoId httpNetRequest=HttpNetRequest() innerResponse=httpNetRequest.getHttpNetRequest(videoUrl) status=innerResponse.status_code innerResponse.close() if status!=200: continue video=sections.getVideoContentInSection(innerResponse.text) if video is not None and not (video.description in videos): videos[video.description]=video video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videoList=list(videos.values()) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) self.writeFeedCache(cachePathFileName,videoList) return (videoList) finally: if None!=response: response.close() def getExclusiveItemsInFeed(self,url): response = None try: now=datetime.now() cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb) if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS): videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) sections=Sections() videos = {} httpNetRequest=HttpNetRequest() response=httpNetRequest.getHttpNetRequest(url) status=response.status_code searchIndex=0 if status!=200: return None if LOG_HTTP_RESPONSES: self.writeLog(url) self.writeLog(response.text) while -1!= searchIndex: videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex) if videoId is None: continue videoUrl='https://video.foxnews.com/v/'+videoId httpNetRequest=HttpNetRequest() innerResponse=httpNetRequest.getHttpNetRequest(videoUrl) status=innerResponse.status_code innerResponse.close() if status!=200: continue video=sections.getVideoContentInSection(innerResponse.text) if video is not None and not (video.description in videos): videos[video.description]=video video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset)) videoList=list(videos.values()) videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True) self.writeFeedCache(cachePathFileName,videoList) return (videoList) finally: if None!=response: response.close() def getItemsInArchiveFeed(self,url,archiveDbFileName): cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb) videos=self.readFeedCache(cachePathFileName) if videos is not None: return(videos) return(None) def readFeedCache(self,pathFileName): try: videos=[] # 'with' will automatically close the stream with open(pathFileName,"r",encoding='utf-8') as inputStream: for line in inputStream: video=Video.fromString(line) videos.append(video) return(videos) except: self.writeLog(traceback.format_exc()) return(None) def writeFeedCache(self,pathFileName,videos): try: with open(pathFileName,"w",encoding='utf-8') as outputStream: for video in videos: outputStream.write(video.toString()+"\n") # 'with' will automatically close the stream return(videos) except: self.writeLog(traceback.format_exc()) return(videos) def isFeedCacheAvailable(self, pathFileName, expireMinutes): try: self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName)) if not os.path.isfile(pathFileName): return False modifiedTime = os.path.getmtime(pathFileName) convertTime = time.localtime(modifiedTime) formatTime = time.strftime('%d%m%Y %H:%M:%S', convertTime) fileDateTime = time.strptime(formatTime, '%d%m%Y %H:%M:%S') currentTime = datetime.now() elapsed = currentTime - datetime(*(fileDateTime[0:6])) totalSeconds = int(elapsed.total_seconds()) hours, remainder = divmod(totalSeconds, 3600) minutes, _ = divmod(remainder, 60) self.writeLog('file is = "{age}" hours old'.format(age=hours)) self.writeLog('file is = "{age}" minutes old'.format(age=minutes)) if hours > 1 or minutes > expireMinutes: self.archiveFile(pathFileName) return False return True except: self.writeLog(traceback.format_exc()) return False def archiveFile(self, pathFileName): if not os.path.isfile(pathFileName): return(False) archiveFile=StringHelper.betweenString(pathFileName, None, '.txt') archiveFileLike=archiveFile+'.txt.*' files = glob.glob(archiveFileLike) index=len(files)+1 archiveFileName=archiveFile+'.txt.'+str(index) print('archiveFile: Copying "{pathFileName}" to "{archiveFileName}".'.format(pathFileName=pathFileName,archiveFileName=archiveFileName)) shutil.copy(pathFileName,archiveFileName) os.remove(pathFileName) return(True) def writeLog(self,message): if self.logger is not None: self.logger.write(message) else: print(message) class Sections: def __init__(self): self.dummy=None def getItemsInSection(self, strInput, sectionName, searchIndex): video=None startSection='<'+sectionName endSection='") if -1 != indexDuration: strDuration=strContainingString[indexDuration:] strDuration=self.betweenString(strDuration,">","<") description=description+" - "+strDuration # Handle video publication strPublication = "" indexPublication=strContainingString.find("
") if -1 != indexPublication: strPublication=strContainingString[indexPublication:] strPublication=self.betweenString(strPublication,"") description=description+" ("+strPublication+")" # Handle the icon icon=None indexIcon=strContainingString.find("srcset=") if -1 != indexIcon: icon=strContainingString[indexIcon:] icon=self.betweenString(icon,"\"","\"") splits=icon.split(',') icon=self.betweenString(splits[len(splits)-1],None,'?') icon=icon.strip() description = description.strip() video=Video(description,previewUrl,icon) video.feedTimeOffset=strPublication return video, searchIndex def getVideoIdInSection(self, strInput, sectionName, searchIndex): video=None startSection='<'+sectionName endSection='=length: return str while stringLength < length: sb=sb+filler stringLength=stringLength+1 return sb+str def parseDuration(strDuration): expression=re.compile(r"\d+") result=expression.findall(strDuration) if 2!=len(result): return None, None return pad(result[0],'0',2), pad(result[1],'0',2) # DON'T LEAVE ANYTHING OPEN BELOW THIS LINE BECAUSE THIS FILE IS IMPORTED BY OTHER MODULES AND ANY CODE NOT IN A CLASS WILL BE RUN # strdate = "January 1, 2026" # if DateTimeHelper.canstrptimeex(strdate): # theDate = DateTimeHelper.strptimeex(strdate) # if(not isinstance(theDate,datetime)): # raise Exception('Invalid type for parameter') # feedTimeOffset = "January 13, 2025" # currentTime = datetime.now() # for i in range(1,100): # relativeTime = DateTimeHelper.applyRelativeTime(currentTime,feedTimeOffset) # print(relativeTime) #print(FOX_NEWS_URL) # pathFileName='/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt' # newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/') # newsFeed.ArchiveFile(pathFileName) # pathFileName='/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt' # modifiedTime=os.path.getmtime(pathFileName) # convertTime=time.localtime(modifiedTime) # formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime) # fileDateTime=DateTimeHelper.strptime(formatTime) #fileDateTime=datetime.strptime(formatTime,'%d%m%Y %H:%M:%S') #fileDateTime2=datetime(*(time.strptime(formatTime,'%d%m%Y %H:%M:%S')[0:6])) #currentTime=datetime.now() #Test the main feed # newsFeed=NewsFeed('/home/pi/Projects/Python/NewsFeed/') # newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt') # newsFeed=NewsFeed(PATH_VIDEO_DATABASE, myLog()) # newsFeed=NewsFeed('/home/pi/Projects/Python/NewsFeed/', myLog()) # videos=newsFeed.getItemsInFeed(FOX_NEWS_URL) # for video in videos: # if(video.description.startswith("Martha")): # print(f"Description={video.description}") # print(f"Url={video.url}") # print(f"getTimestamp={video.getTimestamp().toStringMonthDay()}") # print(f"getFeedTimeOffset={video.getFeedTimeOffset()}") # print(f"getFeedTime={video.getFeedTime()}") # print(f"daysOld={(datetime.now()-video.getFeedTime()).days}") # print(' ') # pull the time out of the description and subtract it from the time we scanned the feed. # the result will be the time of the article..use this to sort on. # (i.e.) FeedTime:02/03/2023 12:00:00 Article Time:2 hours ago Real time:10:00:00 #Test the exclusive items feed #newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/') #videos=newsFeed.getExclusiveItemsInFeed("https://www.foxnews.com") # for video in videos: # print(video.description) # Test the U.S. Feed # newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/') # videos=newsFeed.getUSItemsInFeed("https://www.foxnews.com/video/topics/us") # for video in videos: # print(video.description) # Test the America's NewsRoom Feed # newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/') # videos=newsFeed.getItemsInAmericasNewsRoomFeed("https://www.foxnews.com/video/shows/americas-newsroom") # print('got {count} videos for America''s Newsroom'.format(count=len(videos))) # for video in videos: # print(video.description) # print(video.url) # Test the Outnumbered Feed # newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/') # videos=newsFeed.getItemsInOutnumbereFeed("https://www.foxnews.com/video/shows/outnumbered") # print('got {count} videos for Outnumbered'.format(count=len(videos))) # for video in videos: # print(video.description) # print(video.url) #minutes, seconds = parseDuration('PT24M5S') #print('Duration is {minutes}:{seconds}'.format(minutes=minutes,seconds=seconds)) # isoDate="2022-10-27T10:24:11Z".replace("Z","+00:00") # articleTime=datetime.datetime.fromisoformat(isoDate) # print('time:{time}'.format(time=articleTime)) # currentTime=Date.getCurrentTime() # print('time:{time}'.format(time=currentTime)) # days, hours, minutes, seconds=Date.deltaTime(articleTime,currentTime) # print('elapsed time {days} days, {hours} hours, {minutes} minutes, {seconds} seconds'.format(days=days,hours=hours,minutes=minutes,seconds=seconds)) # currentTime2=Date.getCurrentTime() # strCurrentTime2=str(currentTime2) # currentTime2=datetime.datetime.fromisoformat(strCurrentTime2) # days, hours, minutes, seconds=Date.deltaTime(currentTime2,currentTime) # print('elapsed time {days} days, {hours} hours, {minutes} minutes, {seconds} seconds'.format(days=days,hours=hours,minutes=minutes,seconds=seconds)) # dateList=[] # currentDate=Date() # dateList.append(currentDate) # currentDate2=Date() # dateList.append(currentDate2) # dateList.sort(key=lambda x:x.toString()) # for date in dateList: # print(date.toString()) # #print(dateList)