Модуль:WDSource

Версия от 18:36, 21 июня 2023; Admin (обсуждение | вклад) (1 версия импортирована)
(разн.) ← Предыдущая версия | Текущая версия (разн.) | Следующая версия → (разн.)

Для документации этого модуля может быть создана страница Модуль:WDSource/doc

require('strict')

local p = {}
local wikidata = require('Модуль:WDCommon')
local wdLang = require('Модуль:WDLang')
local backend = require('Модуль:WDBackend')

local cache = require('Модуль:WDSource/Cache')

local langCache = cache.LangCache
local idCache = cache.IdCache

local P_SPECIFIED_AS = 'P1932'
local P_NAMED_AS = 'P1810'
local P_LANG_CODE = 'P218'
local P_WORK_LANG = 'P407'
local P_SUBCLASS_OF = 'P279'
local P_INSTANCE_OF = 'P31'
local P_TITLE = 'P1476'
local P_SUBTITLE = 'P1680'
local P_EDITION_AS_STR = 'P9767'
local P_GENRE = 'P136'
local P_WORK_FORM = 'P7937'
local P_INSCRIPTION = 'P1684'
local P_INSCRIPTION_MENTIONS = 'P6568'
local P_TRANSLATION_OF = 'P9745'
local P_PARTS_COUNT = 'P2635'
local P_AUTHORS = 'P50'
local P_AUTHORS_AS_STR = 'P2093'
local P_ILLUSTRATORS = 'P110'
local P_EDITOR_IN_CHIEF = 'P5769'
local P_EDITORS = 'P98'
local P_TRANSLATORS = 'P655'
local P_CONTAINS = 'P4330'
local P_DATE = 'P577'
local P_START_DATE = 'P580'
local P_END_DATE = 'P582'
local P_URL = 'P953'
local P_URL_STATUS = 'P6954'
local P_ARCHIVE_URL = 'P1065'
local P_ARCHIVE_DATE = 'P2960'
local P_URL_MASK = 'P1630'
local P_SEARCH_URL_MASK = 'P4354'
local P_ISSN = 'P236'
local P_ISBN_13 = 'P212'
local P_ISBN_10 = 'P957'
local P_PUBLISHED_IN = 'P1433'
local P_VOLUME = 'P478'
local P_ISSUE = 'P433'
local P_PAGES = 'P304'
local P_PAGES_COUNT = 'P1104'
local P_PUBLISHER = 'P123'
local P_LOCATION = 'P291'
local P_ARTICLE_ID = 'P2322'
local P_DOI = 'P356'
local P_S2SIC = 'P8299'
local P_OCLC = 'P243'
local P_EDITION_OF = 'P629'
local P_PART_OF = 'P361'
local P_OF_SERIES = 'P179'
local P_GOOGLE_BOOKS_ID = 'P675'
local P_PMID = 'P698'
local P_PMC_ID = 'P932'
local P_PRODUCES = 'P1056'
local P_ROLE = 'P2868'
local P_DEDICATED_TO = 'P825'
local P_FAMILY_NAME = 'P734'
local P_GIVEN_NAME = 'P735'
local P_ANCESTOR_NAME = 'P5056'
local P_WIKIDATA_PROP = 'P1687'

local contentTypeEntities = { 'Q30070675', 'Q108676767', 'Q478798', 'Q60533375', 'Q11424', 'Q187947', 'Q2376293' }

local contributorComponentsMap = {
	{
		name = 'familyName',
		property = P_FAMILY_NAME,
		max = 1,
		isLocal = true,
	},
	{
		name = 'givenName',
		property = P_GIVEN_NAME,
		isLocal = true,
	},
	{
		name = 'ancestorName',
		property = P_ANCESTOR_NAME,
		max = 1,
		isLocal = true,
	},
}

local exactQualifier = {
	property = P_SPECIFIED_AS,
	overwriteValue = true,
	exact = true,
	max = 1,
}

local publishedInOriginLang = {
	name = 'publishedInOriginLang',
	property = P_WORK_LANG,
	get = {
		{
			name = 'publishedInOriginLangCode',
			property = P_LANG_CODE,
			cache = langCache,
			max = 1,
			elseGet = {
				-- for ethnolects determine parent language
				{
					name = 'publishedInOriginLang',
					property = P_SUBCLASS_OF,
					overwrite = true,
					max = 1,
					get = {
						{
							name = 'publishedInOriginLangCode',
							property = P_LANG_CODE,
							cache = langCache,
							overwrite = true,
							max = 1,
						},
					},
				},
			},
		},
	},
}

local publishedInMap = {
	{
		name = 'publishedIn',
		getValue = wikidata.name,
	},
	{
		name = 'publishedInSubtitle',
		property = P_SUBTITLE,
		filter = wikidata.base.tryFilterStatementsByLang,
	},
	{
		name = 'edition',
		property = P_EDITION_AS_STR,
		max = 1,
	},
	{
		name = 'publishedInWorkType',
		property = P_INSTANCE_OF,
		allowedEntities = { 'Q1404878', 'Q83790', 'Q2250844', 'Q13136', 'Q23622', 'Q5292', 'Q615699', 'Q5633421' },
	},
	{
		name = 'publishedInIsHosting',
		property = P_INSTANCE_OF,
		allowedEntities = { 'Q559856', 'Q15590336', 'Q3220391' },
	},
	{
		name = 'publishedInIsHosting',
		property = P_PRODUCES,
		allowedEntities = { 'Q559856' },
	},
	{
		name = 'publishedInEditionType',
		property = P_INSTANCE_OF,
		allowedEntities = { 'Q3331189', 'Q1238720', 'Q571' },
	},
	{
		name = 'publishedInWorkType',
		property = P_WORK_FORM,
	},
	{
		name = 'publishedInIsPeriodic',
		property = P_INSTANCE_OF,
		allowedEntities = { 'Q5633421', 'Q847906', 'Q41298', 'Q11032', 'Q1110794', 'Q737498' },
	},
	{
		name = 'publishedInAuthors',
		properties = { P_AUTHORS, P_AUTHORS_AS_STR },
		isArray = true,
		qualifiers = { exactQualifier },
		skipGetIf = { exact = true },
		get = contributorComponentsMap,
	},
	{
		name = 'publishedInIllustrators',
		property = P_ILLUSTRATORS,
		isArray = true,
		qualifiers = { exactQualifier },
		skipGetIf = { exact = true },
		get = contributorComponentsMap,
	},
	{
		name = 'isLikeBook',
		property = P_INSTANCE_OF,
		allowedEntities = { 'Q571', 'Q128093', 'Q5292' },
	},
	{
		property = P_PRODUCES,
		get = {
			{
				name = 'contentType',
				property = P_CONTAINS,
				allowedEntities = contentTypeEntities,
			},
		},
	},
	{
		name = 'contentType',
		property = P_CONTAINS,
		allowedEntities = contentTypeEntities,
	},
	{
		name = 'publishedInOrigin',
		property = P_TRANSLATION_OF,
		getValue = wikidata.name,
		get = {
			publishedInOriginLang
		},
	},
	{
		name = 'publishedInOrigin',
		property = P_EDITION_OF,
		getValue = wikidata.name,
		get = {
			publishedInOriginLang
		},
	},
	{
		name = 'idType',
		property = P_WIKIDATA_PROP,
		max = 1,
		get = {
			{
				name = 'urlMask',
				property = P_URL_MASK,
				max = 1,
			},
		},
	},
	{
		name = 'urlMask',
		property = P_SEARCH_URL_MASK,
		max = 1,
	},
	{
		name = 'publishedInUrl',
		property = P_URL,
		max = 1,
		qualifiers = {
			{
				name = 'publishedInArchiveUrl',
				property = P_ARCHIVE_URL,
				max = 1,
			},
			{
				name = 'publishedInArchiveDate',
				property = P_ARCHIVE_DATE,
				max = 1,
			},
			{
				name = 'publishedInUrlStatus',
				property = P_URL_STATUS,
				max = 1,
			},
		},
	},
	{
		name = 'publishedInTranslators',
		property = P_TRANSLATORS,
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'publishedInEditorInChief',
		property = P_EDITOR_IN_CHIEF,
		isArray = true,
		qualifiers = { exactQualifier },
		get = contributorComponentsMap,
	},
	{
		name = 'publishedInEditors',
		property = P_EDITORS,
		isArray = true,
		qualifiers = { exactQualifier },
		skipGetIf = { exact = true },
		get = contributorComponentsMap,
	},
	{
		name = 'isbn',
		property = P_ISBN_13,
		match = true,
		qualifiers = {
			{
				name = 'date',
				property = P_DATE,
				max = 1,
			},
			{
				name = 'location',
				property = P_LOCATION,
			},
			{
				name = 'publisher',
				property = P_PUBLISHER,
				max = 1,
			},
			{
				name = 'pagesCount',
				property = P_PAGES_COUNT,
				defaultUnit = 'Q1069725',
				max = 1,
			},
		},
		elseGet = {
			{
				name = 'isbn',
				property = P_ISBN_10,
				qualifiers = {
					{
						name = 'date',
						property = P_DATE,
						max = 1,
					},
					{
						name = 'location',
						property = P_LOCATION,
					},
					{
						name = 'publisher',
						property = P_PUBLISHER,
						max = 1,
					},
					{
						name = 'pagesCount',
						property = P_PAGES_COUNT,
						defaultUnit = 'Q1069725',
						max = 1,
					},
				},
			},
		},
	},
	{
		name = 'oclc',
		property = P_OCLC,
	},
	{
		name = 'date',
		property = P_DATE,
	},
	{
		name = 'location',
		property = P_LOCATION,
	},
	{
		name = 'publisher',
		property = P_PUBLISHER,
		qualifiers = {
			{
				property = P_SPECIFIED_AS,
				overwriteValue = true,
				exact = true,
				max = 1,
			},
		},
	},
	{
		name = 'pagesCount',
		property = P_PAGES_COUNT,
		defaultUnit = 'Q1069725',
		max = 1,
	},
	{
		name = 'issn',
		property = P_ISSN,
	},
	{
		name = 'publishedInPartsCount',
		property = P_PARTS_COUNT,
		allowedUnits = { 'Q1238720' },
		max = 1,
	},
}

local workMap = {
	{
		name = 'detectedInfo',
		property = P_INSTANCE_OF,
		isArray = true,
		allowedEntities = { 'Q1404878', 'Q83790', 'Q2250844', 'Q13136', 'Q23622', 'Q5292', 'Q615699', 'Q5633421' },
	},
	{
		name = 'detectedInfo',
		property = P_GENRE,
		isArray = true,
		allowedEntities = { 'Q1404878', 'Q83790', 'Q2250844', 'Q13136', 'Q23622', 'Q5292', 'Q615699', 'Q5633421' },
	},
}

local workVersionMap = {
	{
		name = 'isArticle',
		property = P_INSTANCE_OF,
		allowedEntities = { 'Q13442814', 'Q191067' },
	},
	{
		name = 'isIssue',
		property = P_INSTANCE_OF,
		allowedEntities = { 'Q28869365', 'Q60534442' },
	},
	{
		name = 'isVolume',
		property = P_INSTANCE_OF,
		allowedEntities = { 'Q1238720' },
	},
	{
		name = 'contentType',
		property = P_INSTANCE_OF,
		allowedEntities = contentTypeEntities,
	},
	{
		name = 'workType',
		property = P_INSTANCE_OF,
		allowedEntities = { 'Q1404878', 'Q83790', 'Q2250844', 'Q13136', 'Q23622', 'Q5292', 'Q615699', 'Q5633421' },
	},
	{
		name = 'workType',
		property = P_WORK_FORM,
		max = 1,
	},
	{
		name = 'isbn',
		property = P_ISBN_13,
		match = true,
		qualifiers = {
			{
				name = 'date',
				property = P_DATE,
				max = 1,
			},
			{
				name = 'location',
				property = P_LOCATION,
			},
			{
				name = 'publisher',
				property = P_PUBLISHER,
				qualifiers = {
					{
						property = P_SPECIFIED_AS,
						overwriteValue = true,
						exact = true,
						max = 1,
					},
				},
			},
			{
				name = 'pagesCount',
				property = P_PAGES_COUNT,
				defaultUnit = 'Q1069725',
				max = 1,
			},
		},
		elseGet = {
			{
				name = 'isbn',
				property = P_ISBN_10,
				qualifiers = {
					{
						name = 'date',
						property = P_DATE,
						max = 1,
					},
					{
						name = 'location',
						property = P_LOCATION,
					},
					{
						name = 'publisher',
						property = P_PUBLISHER,
						max = 1,
					},
					{
						name = 'pagesCount',
						property = P_PAGES_COUNT,
						defaultUnit = 'Q1069725',
						max = 1,
					},
				},
			},
		},
	},
	{
		name = 'authors',
		properties = { P_AUTHORS, P_AUTHORS_AS_STR },
		isArray = true,
		qualifiers = { exactQualifier },
		skipGetIf = { exact = true },
		get = contributorComponentsMap,
	},
	{
		name = 'illustrators',
		property = P_ILLUSTRATORS,
		isArray = true,
		qualifiers = { exactQualifier },
		skipGetIf = { exact = true },
		get = contributorComponentsMap,
	},
	{
		name = 'title',
		getValue = wikidata.name,
	},
	{
		name = 'subtitle',
		property = P_SUBTITLE,
		filter = wikidata.base.tryFilterStatementsByLang,
	},
	{
		name = 'edition',
		property = P_EDITION_AS_STR,
		max = 1,
	},
	{
		name = 'info',
		property = P_INSCRIPTION,
		has = {
			{ property=P_ROLE, value='Q116158574' },
		},
		isArray = true,
		qualifiers = {
			{
				property = P_INSCRIPTION_MENTIONS,
				overwriteEntity = true,
			},
		},
	},
	{
		name = 'editorInChief',
		property = P_EDITOR_IN_CHIEF,
		isArray = true,
		qualifiers = { exactQualifier },
		skipGetIf = { exact = true },
		get = contributorComponentsMap,
	},
	{
		name = 'editors',
		property = P_EDITORS,
		isArray = true,
		qualifiers = { exactQualifier },
		skipGetIf = { exact = true },
		get = contributorComponentsMap,
	},
	{
		name = 'translators',
		property = P_TRANSLATORS,
		isArray = true,
		get = contributorComponentsMap,
	},
	{
		name = 'volume',
		property = P_VOLUME,
		qualifiers = {
			{
				name = 'partTitle',
				property = P_TITLE,
				max = 1,
			},
			{
				name = 'volumeTitle',
				property = P_TITLE,
				max = 1,
			},
		},
	},
	{
		name = 'issue',
		property = P_ISSUE,
	},
	{
		name = 'date',
		property = P_DATE,
		qualifiers = {
			{
				name = 'startDate',
				property = P_START_DATE,
				max = 1,
			},
			{
				name = 'endDate',
				property = P_END_DATE,
				max = 1,
			},
		},
	},
	{
		name = 'pages',
		property = P_PAGES,
	},
	{
		name = 'pagesCount',
		property = P_PAGES_COUNT,
		defaultUnit = 'Q1069725',
	},
	{
		name = 'articleId',
		property = P_ARTICLE_ID,
	},
	{
		name = 'url',
		property = P_URL,
		max = 1,
		qualifiers = {
			{
				name = 'archiveUrl',
				property = P_ARCHIVE_URL,
				max = 1,
			},
			{
				name = 'archiveDate',
				property = P_ARCHIVE_DATE,
				max = 1,
			},
			{
				name = 'urlStatus',
				property = P_URL_STATUS,
				max = 1,
			},
		},
	},
	{
		name = 'location',
		property = P_LOCATION,
	},
	{
		name = 'publisher',
		property = P_PUBLISHER,
		qualifiers = {
			{
				property = P_SPECIFIED_AS,
				overwriteValue = true,
				exact = true,
				max = 1,
			},
		}
	},
	{
		name = 'origin',
		property = P_TRANSLATION_OF,
		getValue = wikidata.name,
		get = {
			{
				name = 'originLang',
				property = P_WORK_LANG,
				get = {
					{
						name = 'langCode',
						property = P_LANG_CODE,
						cache = langCache,
						max = 1,
					}
				},
			},
			{
				name = 'originSubtitle',
				property = P_SUBTITLE,
				max = 1,
			},
			{
				name = 'originVolume',
				property = P_VOLUME,
				max = 1,
				qualifiers = {
					{
						name = 'originVolumeTitle',
						property = P_TITLE,
						max = 1,
					},
				},
			},
		},
	},
	{
		name = 'origin',
		property = P_EDITION_OF,
		getValue = wikidata.name,
		get = {
			{
				name = 'originLang',
				property = P_WORK_LANG,
				get = {
					{
						name = 'langCode',
						property = P_LANG_CODE,
						cache = langCache,
						max = 1,
					}
				},
			},
		},
	},
	{
		name = 'partsCount',
		property = P_PARTS_COUNT,
		max = 1,
	},
	{
		name = 'dedicatedTo',
		property = P_DEDICATED_TO,
	},
	{
		name = 'doi',
		property = P_DOI,
	},
	{
		name = 'oclc',
		property = P_OCLC,
	},
	{
		name = 'pmid',
		property = P_PMID,
	},
	{
		name = 'pmc',
		property = P_PMC_ID,
		get = {
			{
				name = 'urlMask',
				entity = P_PMC_ID,
				property = P_URL_MASK,
				cache = idCache,
				max = 1,
				isLocal = true,
			},
		},
	},
	{
		name = 's2sic',
		property = P_S2SIC,
	},
	{
		name = 'publishedIn',
		property = P_PUBLISHED_IN,
		getValue = wikidata.name,
		max = 1,
		qualifiers = {
			-- probably, wrong way, different publications must have different items
			{
				name = 'volume',
				property = P_VOLUME,
			},
			{
				name = 'issue',
				property = P_ISSUE,
			},
			{
				name = 'date',
				property = P_DATE,
			},
			{
				name = 'startDate',
				property = P_START_DATE,
				max = 1,
			},
			{
				name = 'endDate',
				property = P_END_DATE,
				max = 1,
			},
			{
				name = 'pages',
				property = P_PAGES,
			},
			{
				name = 'articleId',
				property = P_ARTICLE_ID,
			},
		},
	},
	{
		name = 'work',
		property = P_EDITION_OF,
		max = 1,
	},
	{
		name = 'publishedIn',
		property = P_PART_OF,
		getValue = wikidata.name,
		max = 1,
		get = {
			{
				name = 'partsCount',
				property = P_PARTS_COUNT,
				allowedUnits = { 'Q1238720' },
				max = 1,
			},
		},
	},
	{
		name = 'series',
		property = P_OF_SERIES,
		max = 1,
		qualifiers = {
			{
				name = 'seriesIssue',
				property = P_ISSUE,
				max = 1,
			},
		},
	},
}

local topicMap = {
	{
		name = 'id',
		propertyPath = { 'idType', 'entity' },
		max = 1,
		qualifiers = {
			{
				name = 'title',
				property = P_NAMED_AS,
				max = 1,
			},
			{
				name = 'date',
				property = P_DATE,
				max = 1,
			},
			{
				name = 'archiveUrl',
				property = P_ARCHIVE_URL,
				max = 1,
			},
			{
				name = 'archiveDate',
				property = P_ARCHIVE_DATE,
				max = 1,
			},
			{
				name = 'authors',
				property = P_AUTHORS_AS_STR,
				isArray = true,
			},
		},
	},
	{
		-- if P1810 id qualifier is not specified
		name = 'title',
		getValue = wikidata.name,
	},
}

local alternativeUrl = {
	{
		name = 'id',
		property = P_GOOGLE_BOOKS_ID,
		max = 1,
		get = {
			{
				name = 'urlMask',
				entity = P_GOOGLE_BOOKS_ID,
				property = P_URL_MASK,
				cache = idCache,
				max = 1,
				overwrite = true,
			},
		},
	},
}

local function fetchUrl(f, source)
	if source.url then
		if source.urlStatus and source.urlStatus.entity == 'Q1193907' then
			source.url = nil
		else
			return
		end
	end

	local idTable = f:safeField(source, 'id')
	if idTable.value and source.urlMask then
		source.url = { value = source.urlMask.value:gsub('%$1', idTable.value) }
		return
	end

	local pmcTable = f:safeField(source, 'pmc')
	if pmcTable.value and pmcTable.components and pmcTable.components.urlMask then
		source.url = {
			value = pmcTable.components.urlMask.value:gsub('%$1', pmcTable.value)
		}
		return
	end
end

local function fetchPublishedInUrl(f, source)
	if source.publishedInUrl then
		if source.publishedInUrlStatus and source.publishedInUrlStatus.entity == 'Q1193907' then
			source.publishedInUrl = nil
		end
	end
end

local function getLangCode(source)
	local langCode = source.langCode
	if type(langCode) == 'table' then
		langCode = langCode.value
	end
	return langCode
end

local function fetchLang(f, source)
	local publishedInTable = source.publishedIn

	local langMapItem = {
		name = 'lang',
		property = P_WORK_LANG,
		get = {
			{
				name = 'langCode',
				property = P_LANG_CODE,
				cache = langCache,
				max = 1,
			},
		},
	}
	if not source.langCode then
		f:fetch(source, {
			{
				name = 'workVersion',
				get = {
					langMapItem,
					{
						name = 'publishedIn',
						property = P_PUBLISHED_IN,
						getValue = wikidata.name,
						max = 1,
						get = {
							langMapItem,
						},
					},
				},
			},
		})
	end

	f:fetch(source, {
		{
			name = 'publishedIn',
			get = {
				{
					name = 'publishedInLang',
					property = P_WORK_LANG,
					max = 1,
					get = {
						{
							name = 'publishedInLangCode',
							property = P_LANG_CODE,
							cache = langCache,
							max = 1,
							elseGet = {
								-- for ethnolects determine parent language
								{
									name = 'publishedInLang',
									property = P_SUBCLASS_OF,
									overwrite = true,
									max = 1,
									get = {
										{
											name = 'publishedInLangCode',
											property = P_LANG_CODE,
											cache = langCache,
											overwrite = true,
											max = 1,
										},
									},
								},
							},
						},
					},
				},
			},
		},
	})

	if not source.langCode then
		if source.publishedInLang then
			source.lang = source.publishedInLang
		end
		if source.publishedInLangCode then
			source.langCode = source.publishedInLangCode
		end
	end
	
	if not source.langCode then
		f:ensureLang()
		source.langCode = { value = f.lang }
	else
		f.lang = getLangCode(source)
	end
	if not source.lang then
		source.lang = {
			entity = wdLang.langEntity(f.lang),
		}
	end
	
	-- publishedIn field need to be empty to get its qualifiers later
	if publishedInTable == nil then
		source.publishedIn = nil
	end
end

-- Remove all duplicates from inscriptions of a book
local function removeInfoDuplicates(source)
	if not source.info then
		return
	end

	for _, infoItem in ipairs(source.info) do
		for i, detectedItem in ipairs(source.detectedInfo) do
			if detectedItem.entity == infoItem.entity then
				table.remove(source.detectedInfo, i)
			end
		end
	end
end

function p.fetch(source)
	local f = backend.new(getLangCode(source))
	fetchLang(f, source)
	f:assertLang()

	f:fetch(source, {
		{
			name = 'workVersion',
			get = workVersionMap,
		},
		{
			name = 'publishedIn',
			get = publishedInMap,
		},
		{
			name = 'topic',
			get = topicMap,
		},
		{
			name = 'work',
			get = workMap,
		},
		{
			name = 'workVersion',
			get = alternativeUrl,
		},
	})

	local titleTable = f:safeField(source, 'title')
	if titleTable.value then
		local workTable = f:safeField(source, 'workVersion')
		titleTable.entity = workTable.entity
	end
	
	removeInfoDuplicates(source)

	fetchUrl(f, source)
	fetchPublishedInUrl(f, source)
	return source
end

return p