Files
Newsfeed/newsfeed.py

590 lines
23 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import os
import webbrowser
import requests
import traceback
import time
import re
import glob
import shutil
from datetime import timedelta
from datetime import datetime
from datetime import timezone
from environment import *
from utility import *
from video import *
class NewsFeed:
def __init__(self, pathDb, logger=None):
self.pathDb=pathDb
self.logger=logger
@staticmethod
def isResourceAvailable(url):
try:
response=requests.head(url, timeout=2.5)
if not response.ok:
return False
return True
except:
return False
def getItemsInAmericasNewsRoomFeed(self,url):
response = None
try:
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_AMERICAS_NEWSROOM_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=False)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!= response:
response.close()
def getItemsInOutnumberedFeed(self,url):
response = None
try:
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_OUTNUMBERED_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
video, searchIndex = sections.getItemsInSection(response.text,"article",searchIndex)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def getItemsInFeed(self,url):
response = None
try:
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
self.writeLog(f"Loading videos from cache {cachePathFileName}")
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
self.writeLog(f"Loading videos from {url}")
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
video, searchIndex= sections.getItemsInSection(response.text,"article",searchIndex)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=self.filterFeedMaxDays(list(videos.values()),FEED_REJECT_IF_OLDER_THAN_DAYS)
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def filterFeedMaxDays(self, videos, days):
now = datetime.now()
filteredList=[]
for video in videos:
delta = now - video.getFeedTime()
if delta.days <= days:
message = f"INCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
self.writeLog(message)
filteredList.append(video)
else:
message = f"EXCL. days={delta.days},feed time={video.getFeedTime()} feed time offset (strPublication)=:'{video.feedTimeOffset}', description={video.description}"
self.writeLog(message)
return filteredList
def getUSItemsInFeed(self,url):
response = None
try:
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_US_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
if videoId is None:
continue
videoUrl='https://video.foxnews.com/v/'+videoId
httpNetRequest=HttpNetRequest()
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
status=innerResponse.status_code
innerResponse.close()
if status!=200:
continue
video=sections.getVideoContentInSection(innerResponse.text)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def getExclusiveItemsInFeed(self,url):
response = None
try:
now=datetime.now()
cachePathFileName=PathHelper.makePathFileName(VIDEODB_EXCLUSIVE_FILENAME,self.pathDb)
if self.isFeedCacheAvailable(cachePathFileName,CACHE_EXPIRY_MINS):
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
sections=Sections()
videos = {}
httpNetRequest=HttpNetRequest()
response=httpNetRequest.getHttpNetRequest(url)
status=response.status_code
searchIndex=0
if status!=200:
return None
if LOG_HTTP_RESPONSES:
self.writeLog(url)
self.writeLog(response.text)
while -1!= searchIndex:
videoId, searchIndex = sections.getVideoIdInSection(response.text,"article",searchIndex)
if videoId is None:
continue
videoUrl='https://video.foxnews.com/v/'+videoId
httpNetRequest=HttpNetRequest()
innerResponse=httpNetRequest.getHttpNetRequest(videoUrl)
status=innerResponse.status_code
innerResponse.close()
if status!=200:
continue
video=sections.getVideoContentInSection(innerResponse.text)
if video is not None and not (video.description in videos):
videos[video.description]=video
video.setFeedTime(DateTimeHelper.applyRelativeTime(now,video.feedTimeOffset))
videoList=list(videos.values())
videoList=sorted(videoList, key=lambda x:x.getFeedTime(),reverse=True)
self.writeFeedCache(cachePathFileName,videoList)
return (videoList)
finally:
if None!=response:
response.close()
def getItemsInArchiveFeed(self,url,archiveDbFileName):
cachePathFileName=PathHelper.makePathFileName(archiveDbFileName,self.pathDb)
videos=self.readFeedCache(cachePathFileName)
if videos is not None:
return(videos)
return(None)
def readFeedCache(self,pathFileName):
try:
videos=[]
# 'with' will automatically close the stream
with open(pathFileName,"r",encoding='utf-8') as inputStream:
for line in inputStream:
video=Video.fromString(line)
videos.append(video)
return(videos)
except:
self.writeLog(traceback.format_exc())
return(None)
def writeFeedCache(self,pathFileName,videos):
try:
with open(pathFileName,"w",encoding='utf-8') as outputStream:
for video in videos:
outputStream.write(video.toString()+"\n")
# 'with' will automatically close the stream
return(videos)
except:
self.writeLog(traceback.format_exc())
return(videos)
def isFeedCacheAvailable(self, pathFileName, expireMinutes):
try:
self.writeLog('Inspecting cache file {pathFileName}'.format(pathFileName=pathFileName))
if not os.path.isfile(pathFileName):
return False
modifiedTime = os.path.getmtime(pathFileName)
convertTime = time.localtime(modifiedTime)
formatTime = time.strftime('%d%m%Y %H:%M:%S', convertTime)
fileDateTime = time.strptime(formatTime, '%d%m%Y %H:%M:%S')
currentTime = datetime.now()
elapsed = currentTime - datetime(*(fileDateTime[0:6]))
totalSeconds = int(elapsed.total_seconds())
hours, remainder = divmod(totalSeconds, 3600)
minutes, _ = divmod(remainder, 60)
self.writeLog('file is = "{age}" hours old'.format(age=hours))
self.writeLog('file is = "{age}" minutes old'.format(age=minutes))
if hours > 1 or minutes > expireMinutes:
self.archiveFile(pathFileName)
return False
return True
except:
self.writeLog(traceback.format_exc())
return False
def archiveFile(self, pathFileName):
if not os.path.isfile(pathFileName):
return(False)
archiveFile=StringHelper.betweenString(pathFileName, None, '.txt')
archiveFileLike=archiveFile+'.txt.*'
files = glob.glob(archiveFileLike)
index=len(files)+1
archiveFileName=archiveFile+'.txt.'+str(index)
print('archiveFile: Copying "{pathFileName}" to "{archiveFileName}".'.format(pathFileName=pathFileName,archiveFileName=archiveFileName))
shutil.copy(pathFileName,archiveFileName)
os.remove(pathFileName)
return(True)
def writeLog(self,message):
if self.logger is not None:
self.logger.write(message)
else:
print(message)
class Sections:
def __init__(self):
self.dummy=None
def getItemsInSection(self, strInput, sectionName, searchIndex):
video=None
startSection='<'+sectionName
endSection='</'+sectionName
startIndex=strInput.find(startSection,searchIndex)
if -1 == startIndex:
searchIndex=-1
return video, searchIndex
endIndex=strInput.find(endSection,startIndex)
if -1 == endIndex:
searchIndex=-1
return video, searchIndex
searchIndex=endIndex+len(endSection)
strContainingString=strInput[startIndex:endIndex+1+len(endSection)]
if not strContainingString or strContainingString=="":
return video, searchIndex
indexPreview=strContainingString.find("preview=\"")
if -1 == indexPreview:
return video, searchIndex
previewUrl=strContainingString[indexPreview:]
previewUrl=self.betweenString(previewUrl,'"','"')
if "tokenvod" in previewUrl:
return video, searchIndex
# Handle video description
indexDescription=strContainingString.find("alt=\"")
if -1 == indexDescription:
return video, searchIndex
description=strContainingString[indexDescription:]
description=self.betweenString(description,'"','"')
description=self.removeHtml(description)
description=description.replace("- Fox News","")
if "vod.foxbusiness" in description:
return video, searchIndex
# Handle video duration
indexDuration=strContainingString.find("<div class=\"duration\">")
if -1 != indexDuration:
strDuration=strContainingString[indexDuration:]
strDuration=self.betweenString(strDuration,">","<")
description=description+" - "+strDuration
# Handle video publication
strPublication = ""
indexPublication=strContainingString.find("<div class=\"pub-date\">")
if -1 != indexPublication:
strPublication=strContainingString[indexPublication:]
strPublication=self.betweenString(strPublication,"<time>","</time>")
description=description+" ("+strPublication+")"
# Handle the icon
icon=None
indexIcon=strContainingString.find("srcset=")
if -1 != indexIcon:
icon=strContainingString[indexIcon:]
icon=self.betweenString(icon,"\"","\"")
splits=icon.split(',')
icon=self.betweenString(splits[len(splits)-1],None,'?')
icon=icon.strip()
description = description.strip()
video=Video(description,previewUrl,icon)
video.feedTimeOffset=strPublication
return video, searchIndex
def getVideoIdInSection(self, strInput, sectionName, searchIndex):
video=None
startSection='<'+sectionName
endSection='</'+sectionName
startIndex=strInput.find(startSection,searchIndex)
if -1 == startIndex:
searchIndex=-1
return video, searchIndex
endIndex=strInput.find(endSection,startIndex)
if -1 == endIndex:
searchIndex=-1
return video, searchIndex
searchIndex=endIndex+len(endSection)
strContainingString=strInput[startIndex:endIndex+1+len(endSection)]
if not strContainingString or strContainingString=="":
return video, searchIndex
indexVideoId=strContainingString.find("data-video-id")
if -1 ==indexVideoId:
return video, searchIndex
videoId=strContainingString[indexVideoId:]
videoId=self.betweenString(videoId,"\"","\"")
return videoId, searchIndex
def getVideoContentInSection(self, strInput):
video=None
searchItem="\"contentUrl\":"
indexContentUrl=strInput.find(searchItem)
if -1 == indexContentUrl:
return None
strContentUrl=strInput[indexContentUrl+len(searchItem):]
strContentUrl=self.betweenString(strContentUrl,"\"","\"")
strContentUrl=strContentUrl.strip()
searchItem="\"description\":"
indexDescription=strInput.find(searchItem)
if -1 == indexDescription:
return None
strDescription=strInput[indexDescription+len(searchItem):]
strDescription=self.betweenString(strDescription,"\"","\"")
strDescription=strDescription.strip()
searchItem="\"thumbnailUrl\":"
indexIcon=strInput.find(searchItem)
if -1 == indexIcon:
return None
strIcon=strInput[indexIcon+len(searchItem):]
strIcon=self.betweenString(strIcon,"\"","\"")
strIcon=strIcon.strip()
searchItem="\"duration\""
indexDuration=strInput.find(searchItem)
if -1 != indexDuration:
strDuration=strInput[indexDuration+len(searchItem):]
strDuration=self.betweenString(strDuration,"\"","\"")
strDuration=strDuration.strip()
minutes, seconds = parseDuration(strDuration)
if None!=minutes and None!=seconds:
strDescription=strDescription+" - "+minutes+":"+seconds
strDescription = strDescription.strip()
video=Video(strDescription,strContentUrl,strIcon)
return video
def betweenString(self, strItem, strBegin, strEnd ):
return StringHelper.betweenString(strItem, strBegin, strEnd)
def removeHtml(self,strItem):
if strItem is None:
return None
codes={"&#x27;","&#187;"}
for code in codes:
strItem=strItem.replace(code,"'")
strItem=strItem.replace("&amp;","&")
strItem=strItem.replace("&#x2018;","")
strItem=strItem.replace("&#x2019;","")
strItem=strItem.replace("&#x2014;","-")
strItem=strItem.replace("&#39;","'")
strItem=strItem.replace("???","'")
strItem=strItem.replace("&quot;","\"")
return strItem
def pad(str,filler,length):
stringLength=len(str)
sb=""
if stringLength>=length:
return str
while stringLength < length:
sb=sb+filler
stringLength=stringLength+1
return sb+str
def parseDuration(strDuration):
expression=re.compile(r"\d+")
result=expression.findall(strDuration)
if 2!=len(result):
return None, None
return pad(result[0],'0',2), pad(result[1],'0',2)
# DON'T LEAVE ANYTHING OPEN BELOW THIS LINE BECAUSE THIS FILE IS IMPORTED BY OTHER MODULES AND ANY CODE NOT IN A CLASS WILL BE RUN
# strdate = "January 1, 2026"
# if DateTimeHelper.canstrptimeex(strdate):
# theDate = DateTimeHelper.strptimeex(strdate)
# if(not isinstance(theDate,datetime)):
# raise Exception('Invalid type for parameter')
# feedTimeOffset = "January 13, 2025"
# currentTime = datetime.now()
# for i in range(1,100):
# relativeTime = DateTimeHelper.applyRelativeTime(currentTime,feedTimeOffset)
# print(relativeTime)
#print(FOX_NEWS_URL)
# pathFileName='/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt'
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# newsFeed.ArchiveFile(pathFileName)
# pathFileName='/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt'
# modifiedTime=os.path.getmtime(pathFileName)
# convertTime=time.localtime(modifiedTime)
# formatTime=time.strftime('%d%m%Y %H:%M:%S',convertTime)
# fileDateTime=DateTimeHelper.strptime(formatTime)
#fileDateTime=datetime.strptime(formatTime,'%d%m%Y %H:%M:%S')
#fileDateTime2=datetime(*(time.strptime(formatTime,'%d%m%Y %H:%M:%S')[0:6]))
#currentTime=datetime.now()
#Test the main feed
# newsFeed=NewsFeed('/home/pi/Projects/Python/NewsFeed/')
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/videodb.txt')
# newsFeed=NewsFeed(PATH_VIDEO_DATABASE, myLog())
# newsFeed=NewsFeed('/home/pi/Projects/Python/NewsFeed/', myLog())
# videos=newsFeed.getItemsInFeed(FOX_NEWS_URL)
# for video in videos:
# if(video.description.startswith("Martha")):
# print(f"Description={video.description}")
# print(f"Url={video.url}")
# print(f"getTimestamp={video.getTimestamp().toStringMonthDay()}")
# print(f"getFeedTimeOffset={video.getFeedTimeOffset()}")
# print(f"getFeedTime={video.getFeedTime()}")
# print(f"daysOld={(datetime.now()-video.getFeedTime()).days}")
# print(' ')
# pull the time out of the description and subtract it from the time we scanned the feed.
# the result will be the time of the article..use this to sort on.
# (i.e.) FeedTime:02/03/2023 12:00:00 Article Time:2 hours ago Real time:10:00:00
#Test the exclusive items feed
#newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
#videos=newsFeed.getExclusiveItemsInFeed("https://www.foxnews.com")
# for video in videos:
# print(video.description)
# Test the U.S. Feed
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# videos=newsFeed.getUSItemsInFeed("https://www.foxnews.com/video/topics/us")
# for video in videos:
# print(video.description)
# Test the America's NewsRoom Feed
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# videos=newsFeed.getItemsInAmericasNewsRoomFeed("https://www.foxnews.com/video/shows/americas-newsroom")
# print('got {count} videos for America''s Newsroom'.format(count=len(videos)))
# for video in videos:
# print(video.description)
# print(video.url)
# Test the Outnumbered Feed
# newsFeed=NewsFeed('/home/pi/.kodi/addons/plugin.video.fox.news/resources/lib/')
# videos=newsFeed.getItemsInOutnumbereFeed("https://www.foxnews.com/video/shows/outnumbered")
# print('got {count} videos for Outnumbered'.format(count=len(videos)))
# for video in videos:
# print(video.description)
# print(video.url)
#minutes, seconds = parseDuration('PT24M5S')
#print('Duration is {minutes}:{seconds}'.format(minutes=minutes,seconds=seconds))
# isoDate="2022-10-27T10:24:11Z".replace("Z","+00:00")
# articleTime=datetime.datetime.fromisoformat(isoDate)
# print('time:{time}'.format(time=articleTime))
# currentTime=Date.getCurrentTime()
# print('time:{time}'.format(time=currentTime))
# days, hours, minutes, seconds=Date.deltaTime(articleTime,currentTime)
# print('elapsed time {days} days, {hours} hours, {minutes} minutes, {seconds} seconds'.format(days=days,hours=hours,minutes=minutes,seconds=seconds))
# currentTime2=Date.getCurrentTime()
# strCurrentTime2=str(currentTime2)
# currentTime2=datetime.datetime.fromisoformat(strCurrentTime2)
# days, hours, minutes, seconds=Date.deltaTime(currentTime2,currentTime)
# print('elapsed time {days} days, {hours} hours, {minutes} minutes, {seconds} seconds'.format(days=days,hours=hours,minutes=minutes,seconds=seconds))
# dateList=[]
# currentDate=Date()
# dateList.append(currentDate)
# currentDate2=Date()
# dateList.append(currentDate2)
# dateList.sort(key=lambda x:x.toString())
# for date in dateList:
# print(date.toString())
# #print(dateList)