Jump to content

Module:SocialMediaStats

From Wikipedia, the free encyclopedia

-- scribunto module to get YouTube channel statistics from Wikidata for social media personality infoboxes

require ('strict')
local autoDate = require("Module:Auto date formatter")

local POINT_IN_TIME_PID = "P585"
local YT_CHAN_ID_PID = "P2397"
local YT_HANDLE_PID = "P11245"
local SUB_COUNT_PID = "P8687"
local VIEW_COUNT_PID = "P5436"

local p = {} 

-- taken from https://en.wikipedia.org/wiki/Module:Wd
local function parseDate(dateStr, precision)
	precision = precision or "d"

	local i, j, index, ptr
	local parts = {nil, nil, nil}

	if dateStr == nil then
		return parts[1], parts[2], parts[3]  -- year, month, day
	end

	-- 'T' for snak values, '/' for outputs with '/Julian' attached
	i, j = dateStr:find("[T/]")

	if i then
		dateStr = dateStr:sub(1, i-1)
	end

	local from = 1

	if dateStr:sub(1,1) == "-" then
		-- this is a negative number, look further ahead
		from = 2
	end

	index = 1
	ptr = 1

	i, j = dateStr:find("-", from)

	if i then
		-- year
		parts[index] = tonumber(mw.ustring.gsub(dateStr:sub(ptr, i-1), "^%+(.+)$", "%1"), 10)  -- remove '+' sign (explicitly give base 10 to prevent error)

		if parts[index] == -0 then
			parts[index] = tonumber("0")  -- for some reason, 'parts[index] = 0' may actually store '-0', so parse from string instead
		end

		if precision == "y" then
			-- we're done
			return parts[1], parts[2], parts[3]  -- year, month, day
		end

		index = index + 1
		ptr = i + 1

		i, j = dateStr:find("-", ptr)

		if i then
			-- month
			parts[index] = tonumber(dateStr:sub(ptr, i-1), 10)

			if precision == "m" then
				-- we're done
				return parts[1], parts[2], parts[3]  -- year, month, day
			end

			index = index + 1
			ptr = i + 1
		end
	end

	if dateStr:sub(ptr) ~= "" then
		-- day if we have month, month if we have year, or year
		parts[index] = tonumber(dateStr:sub(ptr), 10)
	end

	return parts[1], parts[2], parts[3]  -- year, month, day
end

-- taken from https://en.wikipedia.org/wiki/Module:Wd
local function datePrecedesDate(aY, aM, aD, bY, bM, bD)
	if aY == nil or bY == nil then
		return nil
	end
	aM = aM or 1
	aD = aD or 1
	bM = bM or 1
	bD = bD or 1

	if aY < bY then
		return true
	elseif aY > bY then
		return false
	elseif aM < bM then
		return true
	elseif aM > bM then
		return false
	elseif aD < bD then
		return true
	end

	return false
end

local function getClaimDate(claim)
	if claim['qualifiers'] and claim['qualifiers'][POINT_IN_TIME_PID] then 
		local pointsInTime = claim['qualifiers'][POINT_IN_TIME_PID]
		if #pointsInTime ~= 1 then
			-- be conservative in what we accept
			error("Encountered a statement with zero or multiple point in time (P85) qualifiers. Please add or remove point in time information so each statement has exactly one")
		end
		local pointInTime = pointsInTime[1]
		if pointInTime and 
		   pointInTime['datavalue'] and 
		   pointInTime['datavalue']['value'] and 
		   pointInTime['datavalue']['value']['time'] 
		then
			return parseDate(pointInTime['datavalue']['value']['time'])
		end
	end
	return nil
end

-- for a given list of statements find the newest one with a matching qualifier
local function newestMatchingStatement(statements, qual, targetQualValue)
	local newestStatement = nil
	local newestStatementYr = nil
	local newestStatementMo = nil
	local newestStatementDay = nil
    for k, v in pairs(statements) do
    	if v['rank'] ~= "deprecated" and v['qualifiers'] and v['qualifiers'][qual] then
    		local quals = v['qualifiers'][qual]
    		-- should only have one instance of the qualifier on a statement
    		if #quals == 1 then
    			local qual = quals[1]
    			if qual['datavalue'] and qual['datavalue']['value'] then
    				local qualValue = qual['datavalue']['value']
    				if qualValue == targetQualValue then
	    				local targetYr, targetMo, targetDay = getClaimDate(v)
	    				if targetYr then
	    					local older = datePrecedesDate(targetYr, targetMo, targetDay, newestStatementYr, newestStatementMo, newestStatementDay)
	    					if older == nil or not older then
	    						newestStatementYr, newestStatementMo, newestStatementDay = targetYr, targetMo, targetDay
	    						newestStatement = v
	    					end
	    				end
    				end
    			end
    		end
    	end
    end
	return newestStatement
end

-- for a given property and qualifier pair returns the newest statement that matches
local function newestMatching(e, prop, qual, targetQualValue)
	-- first check the best statements
	local statements = e:getBestStatements(prop)
	local newestStatement = newestMatchingStatement(statements, qual, targetQualValue)
	if newestStatement then
		return newestStatement
	end
	-- try again with all statements if nothing so far
	statements = e:getAllStatements(prop)
	newestStatement = newestMatchingStatement(statements, qual, targetQualValue)
	if newestStatement then
		return newestStatement
	end
	return nil
end

local function getValidStatements(e, prop)
    -- call getAllStatements and filter out deprecated ones
    local allStatements = e:getAllStatements(prop)
    local validStatements = {}
    for _, statement in pairs(allStatements) do
        if statement['rank'] ~= "deprecated" then
            table.insert(validStatements, statement)
        end
    end
    return validStatements
end

local function getEntity(frame)
	local qid = nil
	if frame.args then
		qid = frame.args["qid"]
	end
	if not qid or mw.text.trim(qid) == "" then
		qid = mw.wikibase.getEntityIdForCurrentPage()
	end
	if not qid then
		local e = nil
		return e
	end
	local e = mw.wikibase.getEntity(qid)
	assert(e, "No such item found: " .. qid)
	return e
end

-- Convert YouTube handle to channel ID if needed
local function normalizeChannelId(channelParam)
	if not channelParam then
		return nil
	end
	
	if channelParam:sub(1, 1) == "@" then
		return channelParam:sub(2)
	else
		return channelParam
	end
end

-- Get all YouTube channel IDs from the entity
local function getAllYtChannelIds(e)
	local channelIds = {}
	local chanIdStatements = getValidStatements(e, YT_CHAN_ID_PID)
	
	for _, statement in pairs(chanIdStatements) do
		if statement and 
		   statement["mainsnak"] and 
		   statement["mainsnak"]["datavalue"] and 
		   statement["mainsnak"]["datavalue"]["value"] 
		then
			table.insert(channelIds, statement["mainsnak"]["datavalue"]["value"])
		end
	end
	
	return channelIds
end

local function getHandlesToChannelIds(e)
    -- get a mapping of handles to channel IDs and vice versa
    local mapping = {}
    mapping["handles"] = {}
    mapping["channelIds"] = {}
    local chanIdStatements = getValidStatements(e, YT_CHAN_ID_PID)

    -- Iterate over each channel ID statement and find associated handles as qualifiers
    for _, chanStatement in pairs(chanIdStatements) do
        local channelId = nil
        if chanStatement and 
           chanStatement["mainsnak"] and 
           chanStatement["mainsnak"]["datavalue"] and 
           chanStatement["mainsnak"]["datavalue"]["value"]
        then
            channelId = chanStatement["mainsnak"]["datavalue"]["value"]
        end
        -- Now look for handle qualifiers on this statement
        if chanStatement['qualifiers'] then
            local handleQuals = chanStatement['qualifiers'][YT_HANDLE_PID]
            if handleQuals then
                for _, handleQual in pairs(handleQuals) do
                    if handleQual['datavalue'] and handleQual['datavalue']['value'] then
                        local handleValue = handleQual['datavalue']['value']
						local lowerHandle = handleValue:lower()
                        mapping["handles"][lowerHandle] = channelId
                        mapping["channelIds"][channelId] = handleValue
                    end
                end
            end
        end
    end

	local handleStatements = getValidStatements(e, YT_HANDLE_PID)
	-- Iterate over each handle statement and find associated channel IDs as qualifiers
	for _, handleStatement in pairs(handleStatements) do
		local handleValue = nil
		if handleStatement and 
		   handleStatement["mainsnak"] and 
		   handleStatement["mainsnak"]["datavalue"] and 
		   handleStatement["mainsnak"]["datavalue"]["value"]
		then
			handleValue = handleStatement["mainsnak"]["datavalue"]["value"]
		end
		-- Now look for channel ID qualifiers on this statement
		if handleStatement['qualifiers'] then
			local chanIdQuals = handleStatement['qualifiers'][YT_CHAN_ID_PID]
			if chanIdQuals then
				for _, chanIdQual in pairs(chanIdQuals) do
					if chanIdQual['datavalue'] and chanIdQual['datavalue']['value'] then
						local channelId = chanIdQual['datavalue']['value']
						local lowerHandle = handleValue:lower()
						mapping["handles"][lowerHandle] = channelId
						mapping["channelIds"][channelId] = handleValue
					end
				end
			end
		end
	end


    return mapping
end


-- Find the best matching channel ID for a given parameter
local function findMatchingChannelId(e, channelParam)
	if not channelParam then
		return nil
	end
	
	local normalizedParam = normalizeChannelId(channelParam)
	local allChannelIds = getAllYtChannelIds(e)
	
	-- First try exact match
	for _, channelId in pairs(allChannelIds) do
		if channelId == normalizedParam or channelId == channelParam then
			return channelId
		end
	end
	
	-- If no exact match then we assume it's a handle and look for it
    -- first check if it starts with UC
    local handleToChannelId = getHandlesToChannelIds(e)
	if handleToChannelId["handles"][normalizedParam:lower()] then
		return handleToChannelId["handles"][normalizedParam:lower()]
	end
	
	return nil
end

local function returnError(frame, eMessage)
	return frame:expandTemplate{ title = 'error', args = { eMessage } } .. "[[Category:Pages with SocialMediaStats module errors]]"
end

-- Get the statistic value from a statement
local function getStatisticValue(statement)
	if statement and 
	   statement["mainsnak"] and 
	   statement['mainsnak']["datavalue"] and 
	   statement['mainsnak']["datavalue"]["value"] and 
	   statement['mainsnak']["datavalue"]['value']['amount']
	then
		return tonumber(statement['mainsnak']["datavalue"]['value']['amount'])
	end
	return nil
end

-- Get formatted date from a statement
local function getFormattedDate(frame, statement)
	if statement then
		local yt_year, yt_month, yt_day = getClaimDate(statement)
		if yt_year then
			return autoDate._access_archive_format(frame:expandTemplate{title="Format date", args = {yt_year, yt_month, yt_day}})
		end
	end
	return nil
end

-- Get subscriber count for a channel
local function getSubscriberCount(e, channelId)
	local statement = newestMatching(e, SUB_COUNT_PID, YT_CHAN_ID_PID, channelId)
	return getStatisticValue(statement)
end

-- Get view count for a channel
local function getViewCount(e, channelId)
	local statement = newestMatching(e, VIEW_COUNT_PID, YT_CHAN_ID_PID, channelId)
	return getStatisticValue(statement)
end

-- Get the date for statistics (assumes subscriber and view counts have same date)
local function getStatsDate(frame, e, channelId)
	local statement = newestMatching(e, SUB_COUNT_PID, YT_CHAN_ID_PID, channelId)
	return getFormattedDate(frame, statement)
end

local function passedArgs(frame)
	-- iterate over frame.args and check if any non-qid and non-number args are present
	for k, v in pairs(frame.args) do
		if k ~= "qid" and type(k) ~= "number" and tonumber(k) == nil then
			return true
		end
	end
	return false
end


-- Main function to get subscriber counts for up to 3 channels
function p.YTsubscribersInt(frame)
	if not passedArgs(frame) then
		return ""
	end

	local e = getEntity(frame)
	if not e then
		return ""
	end

	local results = {}
	local singleResult = nil
	local hasData = false
	
    local handleMapping = getHandlesToChannelIds(e)

	-- Check each of the 3 possible channels
	for i = 1, 3 do
		local handleParam = "youtube_handle" .. (i == 1 and "" or tostring(i))
		local idParam = "youtube_id" .. (i == 1 and "" or tostring(i))
		
		local channelParam = frame.args[handleParam]
		if channelParam == nil or channelParam == "" then
			channelParam = frame.args[idParam]
		end
		
		if channelParam then
			local channelId = findMatchingChannelId(e, channelParam)
			if channelId then
				local subCount = getSubscriberCount(e, channelId)
				if subCount and subCount > 0 then
					local formattedCount = frame:expandTemplate{title="Format price", args = {subCount}}
					local channelName = channelParam:gsub("^@", "")

                    if handleMapping["channelIds"][channelName] then
                        channelName = handleMapping["channelIds"][channelName]
                    end

					table.insert(results, formattedCount .. " (" .. channelName .. ")")
					singleResult = formattedCount
					hasData = true
				end
			end
		end
	end
	
	if not hasData then
		local params = ""
		for k, v in pairs(frame.args) do
			params = params .. k .. "=" .. v .. "; "
		end
		return returnError(frame, "No subscriber data found for " .. e:getId() .. " with the provided parameters: " .. params)
	end
	
	-- If only one result, return it directly
	if #results == 1 and singleResult ~= nil then
		return singleResult
	end
	
	-- Multiple results, use {{ubl}}
	return frame:expandTemplate{title="ubl", args = results}
end

-- Main function to get view counts for up to 3 channels
function p.YTviewsInt(frame)
	if not passedArgs(frame) then
		return ""
	end


	local e = getEntity(frame)
	if not e then
		return ""
	end

	local results = {}
	local singleResult = nil
	local hasData = false
	local handleMapping = getHandlesToChannelIds(e)

	-- Check each of the 3 possible channels
	for i = 1, 3 do
		local handleParam = "youtube_handle" .. (i == 1 and "" or tostring(i))
		local idParam = "youtube_id" .. (i == 1 and "" or tostring(i))
		
		local channelParam = frame.args[handleParam]
		if channelParam == nil or channelParam == "" then
			channelParam = frame.args[idParam]
		end
		
		if channelParam then
			local channelId = findMatchingChannelId(e, channelParam)
			if channelId then
				local viewCount = getViewCount(e, channelId)
				if viewCount and viewCount > 0 then
					local formattedCount = frame:expandTemplate{title="Format price", args = {viewCount}}
					local channelName = channelParam:gsub("^@", "") -- Remove @ if present for display

                    if handleMapping["channelIds"][channelName] then
                        channelName = handleMapping["channelIds"][channelName]
                    end

					table.insert(results, formattedCount .. " (" .. channelName .. ")")
					singleResult = formattedCount
					hasData = true
				end
			end
		end
	end
	
	if not hasData then
		return ""
	end
	
	-- If only one result, return it directly
	if #results == 1 and singleResult ~= nil then
		return singleResult
	end
	
	-- Multiple results, use {{ubl}}
	return frame:expandTemplate{title="ubl", args = results}
end

-- Function to get the date of statistics
function p.YTdateInt(frame)
	local e = getEntity(frame)
	if not e then
		return ""
	end

	-- Try to get date from any available channel
	for i = 1, 3 do
		local handleParam = "youtube_handle" .. (i == 1 and "" or tostring(i))
		local idParam = "youtube_id" .. (i == 1 and "" or tostring(i))
		
		local channelParam = frame.args[handleParam]
		if channelParam == nil or channelParam == "" then
			channelParam = frame.args[idParam]
		end
		
		if channelParam then
			local channelId = findMatchingChannelId(e, channelParam)
			if channelId then
				local date = getStatsDate(frame, e, channelId)
				if date then
					return date
				end
			end
		end
	end
	
	return ""
end

-- Safe wrapper functions
function p.YTsubscribers(frame)
	local status, obj = pcall(p.YTsubscribersInt, frame)
	if status then
		return obj
	else 
		return returnError(frame, obj)
	end
end

function p.YTviews(frame)
	local status, obj = pcall(p.YTviewsInt, frame)
	if status then
		return obj
	else 
		return returnError(frame, obj)
	end
end

function p.YTdate(frame)
	local status, obj = pcall(p.YTdateInt, frame)
	if status then
		return obj
	else 
		return returnError(frame, obj)
	end
end

return p


--[[
-- useful for debugger testing
local f = mw.getCurrentFrame()
local args = {}
args['qid'] = 'Q111862397'
args['youtube_handle'] = 'LinusTechTips'
f['args'] = args
p.YTsubscribersInt(f)
p.YTviewsInt(f)

local e = mw.wikibase.getEntity('Q57618112')
print(mw.dumpObject(getHandlesToChannelIds(e)))
--]]