From 52fb4f8b0674ea2334f603cf6316a7b56805177c Mon Sep 17 00:00:00 2001 From: Conrad Date: Wed, 7 Oct 2020 11:14:35 +0200 Subject: [PATCH] Added old steamCrawling file for methods to reuse --- src/steam_tools/steamCrawling.py | 464 +++++++++++++++++++++++++++++++ src/ub/settings.py | 2 + 2 files changed, 466 insertions(+) create mode 100644 src/steam_tools/steamCrawling.py diff --git a/src/steam_tools/steamCrawling.py b/src/steam_tools/steamCrawling.py new file mode 100644 index 0000000..9085a9a --- /dev/null +++ b/src/steam_tools/steamCrawling.py @@ -0,0 +1,464 @@ +# For communication with the Steam API +import urllib.request +import json +# For database handling (importing from Django Models) +from .models import S_Users, S_UserGames, S_Games +# For getting the STEAM_API_KEY +from django.conf import settings +# For storing the API call limit +import os +# For flagging the daily API call limit +from datetime import datetime + + +# GLOBAL VARIABLES - API URLS +API_START = 'http://api.steampowered.com/' +API_KEY = str(settings.STEAM_API_KEY) +API_END_SIN = '&steamid=' +API_END_PLU = '&steamids=' +API_END_URL = '&vanityurl=' + +API_COMMAND_PLAYER = 'ISteamUser/GetPlayerSummaries/v0002/?key=' +API_COMMAND_FL = 'ISteamUser/GetFriendList/v0001/?key=' +API_COMMAND_GAMES = 'IPlayerService/GetOwnedGames/v0001/?key=' +API_COMMAND_GAMES_ALL = 'ISteamApps/GetAppList/v0002/?key=' +API_COMMAND_RESOLVEURL = 'ISteamUser/ResolveVanityURL/v0001/?key=' + +API_QUERY_PLAYER = API_START + API_COMMAND_PLAYER + API_KEY + API_END_PLU +API_QUERY_FRIENDLIST = API_START + API_COMMAND_FL + API_KEY + API_END_SIN +API_QUERY_GAMES = API_START + API_COMMAND_GAMES + API_KEY + API_END_SIN +API_QUERY_ALL_GAMES = API_START + API_COMMAND_GAMES_ALL + API_KEY +API_QUERY_RESOLVEURL = API_START + API_COMMAND_RESOLVEURL + API_KEY + API_END_URL + +# ------------------------------------------------------------ +# ------------- HELPER FUNCTIONS ----------------------------- +# -------(not called by anything except this library) -------- +# ------------------------------------------------------------ + + +# Writes the API_QUOTA variable in a file +def saveAPIquota(): + date = datetime.today().strftime('%Y-%m-%d') + file = open(os.path.join(str(settings.BASE_DIR), + 'API_QUOTA_' + str(date)), 'w+') + file.write(str(API_QUOTA)) + file.close() + return 0 + + +# Checks the API quota and adds one if it's not over the limit yet +def checkAPIquota(): + global API_QUOTA + + if API_QUOTA < 100000: + API_QUOTA += 1 + return 0 + else: + saveAPIquota() + exit() + return 1 + + +# Reads the current API quota for the day from a save file +def readAPIquota(): + date = datetime.today().strftime('%Y-%m-%d') + try: + file = open(os.path.join( + str(settings.BASE_DIR), 'API_QUOTA_' + date), 'r') + API_QUOTA = int(file.readline()) + file.close() + if API_QUOTA > 100000: + exit() + return API_QUOTA + except IOError: + return 0 + + +# Daily limit of API calls +API_QUOTA = readAPIquota() + + +# Returns a draft of a user without actually writing it to the database +def draftPlayer(player_object, mode): + if mode == 1: + return S_Users(steamID=player_object['steamid']) + elif mode == 2: + return S_Users( + steamID=player_object['steamid'], + s_profilestate=( + True if 'profilestate' in player_object else False), + s_loccountrycode=( + player_object['loccountrycode'] if 'loccountrycode' in player_object else False), + s_timecreated=( + player_object['timecreated'] if 'timecreated' in player_object else False), + s_primaryclanid=( + player_object['primaryclanid'] if 'primaryclanid' in player_object else False), + s_personaname=( + player_object['personaname'] if 'personaname' in player_object else False), + s_profileurl=(player_object['profileurl'] + if 'profileurl' in player_object else False) + ) + return 0 + + +# Creates a player in the database from a player_object, if they don't already exist +def createPlayer(player_object, mode): + if mode == 1: + S_Users.objects.get_or_create(steamID=player_object['steamid']) + elif mode == 2: + S_Users.objects.get_or_create( + steamID=player_object['steamid'], + s_profilestate=( + True if 'profilestate' in player_object else False), + s_loccountrycode=( + player_object['loccountrycode'] if 'loccountrycode' in player_object else False), + s_timecreated=( + player_object['timecreated'] if 'timecreated' in player_object else False), + s_primaryclanid=( + player_object['primaryclanid'] if 'primaryclanid' in player_object else False), + s_personaname=( + player_object['personaname'] if 'personaname' in player_object else False), + s_profileurl=(player_object['profileurl'] + if 'profileurl' in player_object else False) + ) + return 0 + + +# Updates player in the database to new information (Get -> Update -> Save) +def updatePlayer(player_object): + if 'steamid' in player_object: + player = S_Users.objects.get(steamID=player_object['steamid']) + + player.s_profilestate = ( + True if 'profilestate' in player_object else False) + player.s_loccountrycode = ( + player_object['loccountrycode'] if 'loccountrycode' in player_object else False) + player.s_timecreated = ( + player_object['timecreated'] if 'timecreated' in player_object else False) + player.s_primaryclanid = ( + player_object['primaryclanid'] if 'primaryclanid' in player_object else False) + player.s_personaname = ( + player_object['personaname'] if 'personaname' in player_object else False) + player.s_profileurl = ( + player_object['profileurl'] if 'profileurl' in player_object else False) + + player.save(update_fields=['s_profileurl', 's_personaname', + 's_primaryclanid', 's_timecreated', 's_loccountrycode', 's_profilestate']) + return 0 + + +# Get the information from the Steam API for a certain steamid +def getPlayer(steamid, mode): + if mode == 1: + createPlayer({'steamid': steamid}, mode) + elif mode == 2: + if 0 == checkAPIquota(): + try: + player_object = json.load(urllib.request.urlopen( + API_QUERY_PLAYER + str(steamid))) + createPlayer(player_object['response']['players'][0], mode) + except urllib.error.HTTPError: + return 1 + return 0 + + +# Imports a list of steamids to the database, uses bulk creation if possible +def getPlayers(steamid_list, mode): + # For better performance and stability only keep unique values in this list + steamid_list = set(steamid_list) + player_cache = [] + + if mode == 1: + # For simple mode getPlayers just takes the steamids and creates the players in the database + for steamid in steamid_list: + player_cache.append( + draftPlayer( + {'steamid': steamid}, + mode + ) + ) + + elif mode == 2: + # For extended mode getPlayers creates a query for the Steam API to get the playerobjects for all the steamids + if 0 == checkAPIquota(): + try: + query = API_QUERY_PLAYER + \ + ','.join(str(steamid) for steamid in steamid_list) + players_object = json.load(urllib.request.urlopen(query)) + + for player_object in players_object['response']['players']: + player_cache.append( + draftPlayer(player_object, mode) + ) + + except urllib.error.HTTPError: + # If there are errors with as little as one steamID in the list then check them seperately (slower, but only the problem will be left out) + for steamid in steamid_list: + getPlayer(steamid, mode) + return 0 + + S_Users.objects.bulk_create(player_cache, ignore_conflicts=True) + return 0 + + +# Gets the friendlist of a certain player +def getFriends(steamid, mode): + try: + if 0 == checkAPIquota(): + # For both modes this just creates a query for the friendlist of a steamid and from that it checks the friends into the database + checklist, friendlist = [], json.load(urllib.request.urlopen( + API_QUERY_FRIENDLIST + str(steamid) + '&relationship=friend')) + + for friend in friendlist['friendslist']['friends']: + checklist.append(str(friend['steamid'])) + getPlayers(checklist, mode) + + if mode == 5: + return checklist + + except urllib.error.HTTPError: + return 1 + return 0 + + +# Get the games for a certain steamid and save them in the S_UserGames database +# Hint: s_gameCount can have 3 status: -1: No API response, 0: Empty API response, any number: number of games +def getGames(player): + try: + if 0 == checkAPIquota(): + library = json.load(urllib.request.urlopen( + API_QUERY_GAMES + str(player.steamID) + '&format=json')) + + # If the user doesn't share their library then the response is empty + if 'game_count' in library['response']: + # Update Game Count for the player + player.s_gameCount, gamesArray = library['response']['game_count'], [ + ] + for entry in library['response']['games']: + game = S_Games.objects.get_or_create(gameID=entry['appid']) + # Create User - Game Relation + gamesArray.append( + S_UserGames( + user=player, + game=game[0], + hours=entry['playtime_forever'] + ) + ) + S_UserGames.objects.bulk_create( + gamesArray, ignore_conflicts=True) + else: + player.s_gameCount = 0 + except urllib.error.HTTPError: + player.s_gameCount = -1 + + player.save(update_fields=['s_gameCount']) + return 0 + + +# Checks if a given variable is an int or not +def checkInt(s): + try: + int(s) + return True + except ValueError: + return False + + +# Checks a custom steam url and returns the steamID or False +def checkCustomURL(url): + if 0 == checkAPIquota(): + try: + name = json.load(urllib.request.urlopen( + API_QUERY_RESOLVEURL + str(url))) + return (name['response']['steamid'] if name['response']['success'] == 1 else False) + except urllib.error.HTTPError: + return False + else: + return False + + +# Converts a given input to a valid steam ID or False +def getSteamID(input_value): + # i. e. 76561198090620481 + if checkInt(input_value): + return input_value + + # i. e. https://steamcommunity.com/id/creyD or https://steamcommunity.com/profiles/76561198090620481/ + elif (input_value.startswith('http') or input_value.startswith('steam')): + # https://steamcommunity.com/profiles/76561198090620481/ + if 'profiles' in input_value: + # https://steamcommunity.com/profiles/76561198090620481/ + if input_value.endswith('/'): + return input_value[-18:-1] + # https://steamcommunity.com/profiles/76561198090620481 + else: + return input_value[-17:] + # https://steamcommunity.com/id/creyD + if 'id' in input_value: + # https://steamcommunity.com/id/creyD/ + temp_split = input_value.split('/') + if input_value.endswith('/'): + return checkCustomURL(temp_split[-2]) + # https://steamcommunity.com/id/creyD + else: + return checkCustomURL(temp_split[-1]) + return False + + # i. e. creyd + else: + return checkCustomURL(input_value) + return False + + +# ------------------------------------------------------------ +# ------------- PUBLIC FUNCTIONS ----------------------------- +# -------(called by anything except this library) ------------ +# ------------------------------------------------------------ + +# Updates all player info in the database (if no data is filled in for the player) +# Note: .count() doensn't work here as it seems to count live and influences the length of the range() therefore +def updateAllPlayers(pipe): + userlist = S_Users.objects.filter( + s_profileurl__isnull=True).values_list('steamID', flat=True) + for i in range(int(len(userlist) / 100) + 1): + if 0 == checkAPIquota(): + query = API_QUERY_PLAYER + \ + ','.join(str(user) + for user in userlist[int(i * 100):int((i + 1) * 100 - 1)]) + players_object = json.load(urllib.request.urlopen(query)) + + for player in players_object['response']['players']: + updatePlayer(player) + + saveAPIquota() + pipe.nextStep() + return 0 + + +# Gets the games for each person in the S_Users table which doesn't already have a gamecount +def updateGames(pipe): + userlist = S_Users.objects.filter(s_gameCount__isnull=True) + for user in userlist: + getGames(user) + + saveAPIquota() + pipe.nextStep() + return 0 + + +# Gets the names for all games in S_Games table +def getAllGames(pipe): + if 0 == checkAPIquota(): + # Getting the official list of all steam games + gamelist = json.load(urllib.request.urlopen(API_QUERY_ALL_GAMES)) + + # If initial games crawing we can use bulk creation bcs of no duplicates + if S_Games.objects.count() == 0: + games_cache = [] + for game in gamelist['applist']['apps']: + games_cache.append( + S_Games(gameID=game['appid'], gameName=game['name'])) + S_Games.objects.bulk_create(games_cache, ignore_conflicts=True) + else: + for game in gamelist['applist']['apps']: + S_Games.objects.get_or_create( + gameID=game['appid'], gameName=game['name']) + + saveAPIquota() + pipe.nextStep() + return 0 + + +def crawl(startID, depth, pipe, mode): + # 1 - Simple Crawl, 2 - Extensive Crawl, 3 - Crawl one level (simple), 4 - Crawl one level (extensive) + CURRENT = list(S_Users.objects.all().values_list('steamID', flat=True)) + + if (mode == 1 or mode == 2): + # Mode 1 is a basic snowball crawler: for each step in depth it goes over the users in the database and gets their friends + # Mode 2 is an extensive mode similar to the first mode + + getPlayer(str(startID), mode) + for __ in range(depth): + snapshot = S_Users.objects.all().values_list('steamID', flat=True) + for userid in snapshot: + if userid not in CURRENT: + getFriends(userid, mode) + CURRENT.append(userid) + pipe.nextStep() + + elif (mode == 3 or mode == 4): + # Mode 3 + 4 crawl one level with either extensive or simple mode on + pipe.pipe_steps = 1 + pipe.save(update_fields=['pipe_steps']) + + for userid in CURRENT: + getFriends(userid, mode - 2) + pipe.nextStep() + + elif (mode == 5): + idList = [] + failsafecounter = 0 + visitedIDs = [] + currentID = startID + for __ in range(depth): + newFriends = getFriends(currentID, 5) + visitedIDs.append(currentID) + if newFriends != 1: + for item in newFriends: + if item not in idList: + idList.append(item) + currentID = idList[-1] + + if currentID in visitedIDs: + currentID = idList[1 + failsafecounter] + failsafecounter = failsafecounter + 1 + + for item in idList: + createPlayer({'steamid': item}, 1) + + pipe.nextStep() + saveAPIquota() + return 0 + + +# Gets Steam Accounts for a string of semicolon separated names +def getSteamIDs(string): + error, players, steam64IDarray = False, string.split(';'), [] + + for player in players: + extractedSteamID = getSteamID(player) + if extractedSteamID: + steam64IDarray.append(extractedSteamID) + else: + error = True + + return steam64IDarray, error + + +# Returns an array of appids of games any given list of players has in common +# Hint: Input format "123456 123456 123456", all Steam64IDs +def getCommonGames(steamid_list): + error = False + steamIDlist, error = getSteamIDs(steamid_list) + getPlayers(steamIDlist, 1) + + guys = [S_Users.objects.get(steamID=player) for player in steamIDlist] + commonGames = [] + + for player in guys: + getGames(player) + + for player in guys: + if len(commonGames) > 0: + commonGames = commonGames.intersection( + S_UserGames.objects.filter(user=player).values_list('game', flat=True)) + else: + commonGames = S_UserGames.objects.filter( + user=player).values_list('game', flat=True) + + gameInfoArray = [] + for game in commonGames: + gameInfoArray.append([S_Games.objects.get( + gameID=game).gameName, S_Games.objects.get(gameID=game).gameID]) + + return gameInfoArray, error, guys diff --git a/src/ub/settings.py b/src/ub/settings.py index f305ce2..f4387aa 100644 --- a/src/ub/settings.py +++ b/src/ub/settings.py @@ -121,3 +121,5 @@ LOGIN_URL = '/login' # Maximum File Size for File Uploads FILE_UPLOAD_MAX_MEMORY_SIZE = 104857600 DATA_UPLOAD_MAX_MEMORY_SIZE = 104857600 + +STEAM_API_KEY = os.environ.get("SECRET_KEY")