模組:PrevalenceData
呢個模組已經被評為beta版,可以廣泛用。因為佢新近完成,請慎用,以確保輸出結果符合預期。 |
名
編輯PrevalenceData——顯示已知發病率
簡述
編輯{{#invoke:PrevalenceData | main | qId= 維基數據號碼 }}
p.test()
用途
編輯模組主入口 main 收一個參數 qId,係某種病嘅維基數據號碼(Q 字頭數字),輸出對應病嘅已知發病率(P1193)。唔畀參數輸出空字串;如果發病率係限於某啲情況(即係所謂修飾子[暫譯],qualifier,目前主要係睇國家或者地區),會用括弧括住。如果有出處,會 「盡人事」 畀埋出處(維基數據嘅出處好多時寫得好求其,只能夠 「盡人事」,畀出處亦係呢度原創功能,比起主模組測試量更少)。
所有不明參數全部忽略,唔會出錯誤訊息;串錯 qId(包括錯大細楷)當不明參數,出空字串。
範例
編輯- 打:
{{#invoke:PrevalenceData|main|qId=Q190564}}
- 出:0.0123%(英國)
上面例子嘅出處,可以睇到維基數據寫出處有幾求其(最後一個睇落咁正常,係因為佢係維基數據嘅正宗引書方法,其他基本上係引網):
- ↑ PubMed publication ID 19692116。https://pubmed.ncbi.nlm.nih.gov/19692116
- ↑ https://doi.org/10.1007%2Fs11894-017-0558-9
- ↑ https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4586575
- ↑ 4.0 4.1 Rebbeca A Grysiewicz、Kurian Thomas、Dilip K Pandey(2008年11月1號)。“Epidemiology of ischemic and hemorrhagic stroke: incidence, prevalence, mortality, and risk factors.”Neurologic Clinics 第26卷4期。doi:10.1016/J.NCL.2008.07.003。
注意
編輯參數名係 qId(中間大楷),唔係 qid。
參數 qId 畀咗就一定要有嘢,唔可以空白,空白會產生系統錯誤,號碼唔存在亦會產生系統錯誤。
測試
編輯呢度嘅版本可以用 test 函數做有限度測試,喺 Debug console 輸入 p.test()
,測試成功冇輸出,失敗出錯誤訊息。礙於呢個模組用維基數據,測試失敗可能只係維基數據更新咗,遇到失敗嘅情況要睇係邊行,睇返 check 緊邊項維基數據,再判斷係模組出問題定數據變咗。
test 入面嘅測試全部都係粵語維基百科呢便自己做,同原作者無關。
人手睇模組輸出(例如搵緊點解測試失敗,或者整新 test case)可以用 print(p.main({args={qId='維基數據號碼'}}))
。
來源
編輯呢個模組應該係來自英文維基百科,2016年推出,2024年引入,有本地化同本地有做一啲修正,但係冇加入英維喺2019年後做嘅改動。
同原版(包括目前嘅英維版)主要唔同:
- 除咗國家同地區之外,有一兩個同地區無關嘅修飾子都會處理(因為例如有啲病男女發病率唔同,唔寫明好誤導)
- 地區會睇埋 P131(極之常見嘅 「位於政區」)
- 遇到明顯入錯嘅數據(負數或者大過 100%)會自動抌咗佢
- 有啲情況會加文章入追蹤類,方便搵問題
- 如果維基數據有記出處,會 「盡人事」 畀埋出處
睇埋
編輯- Category:P1193 同時出現有括弧同冇括弧數值
- Category:P1193 多過一個冇括弧數值
- Category:P1193 維基數據遇到未知修飾子
- Category:P1193 維基數據遇到無效數值
上面嘅解係穿透包含咗自模組:PrevalenceData/doc。 (改 | 史) 編者可以響呢個模組嘅沙盤 (開 | 鏡)同埋試例 (改)版度試驗佢。 呢個模組嘅細版。 |
-- This module is specifically for the Wikidata property "prevalence" (P1193), due
-- to its particular need for ranges and area-based qualifiers, and the lack of
-- support for these in the main Wikidata module.
-- Partially tested by Cantonese Wikipedia and fairly heavily modified. See testcases subpage.
require 'strict'
local p = {}
local label_to = '-';
local comma = '、';
local parenleft = '(';
local parenright = ')';
local overrides = {
-- Certain qualifiers might need a
-- manual-ish override for labels.
-- [ 132453 ] = 'developed world'
[ 1156970 ] = '全人類'
};
local cat_cite_as_stated_in = '[[Category:P1193 引咗書]]'
local cat_cite_web = '[[Category:P1193 引咗網]]'
local cat_conflicting_qualifiers = '[[Category:P1193 維基數據遇到多過一個同類修飾子]]'
local cat_more_than_one_unannotated_value = '[[Category:P1193 多過一個冇括弧數值]]'
local cat_unannotated_value_in_annotated_list = '[[Category:P1193 同時出現有括弧同冇括弧數值]]'
local cat_unknown_qualifier = '[[Category:P1193 維基數據遇到未知修飾子]]'
local cat_value_out_of_range = '[[Category:P1193 維基數據遇到無效數值]]'
-------------------------------------------------------------------------------
-- Proof-of-concept to see how feasible this is
-- This function is completely untested (as in no test cases exist)
--
-- Wikidata references are completely unlike Wikipedia references. For example,
-- web cites never have titles or authors, and often have no access dates
-- (only language is required, but even this is often missing). Sometimes
-- books have no publication date or no language. It's a complete mess.
local z = require('模組:書名')
local function render_title ( title, work )
local it
if z.cjk_p(title) or (work and z.cjk_p(work)) then
it = z.build_type_2_citable(work, title);
elseif work then
it = '“' .. title .. "”. ''" .. work .. "''"
else
it = "''" .. title .. "''"
end
return it
end
local function render_date ( d )
local it
local yyyy, mm, dd = d.time:match('^%+(%d%d%d%d)%-(%d%d)%-(%d%d)');
if yyyy then
it = yyyy .. '年' .. (mm + 0) .. '月' .. (dd + 0) .. '號'
end
return it
end
local function citeitem ( qid )
local it = ''
local cited = mw.wikibase.getEntity(qid)
local title, date, doi, url, work, volume, number, pages, journal_mode_p
local authors = {}
local items = {}
title = mw.wikibase.label(qid)
if cited.claims.P304 then -- pages
pages = cited.claims.P304[1].mainsnak.datavalue.value
end
if cited.claims.P356 then -- DOI
doi = cited.claims.P356[1].mainsnak.datavalue.value
url = 'https://doi.org/' .. doi
end
if cited.claims.P433 then -- number (issue)
number = cited.claims.P433[1].mainsnak.datavalue.value
end
if cited.claims.P478 then -- volume
volume = cited.claims.P478[1].mainsnak.datavalue.value
end
if cited.claims.P577 then -- publication date
date = cited.claims.P577[1].mainsnak.datavalue.value
end
if cited.claims.P1433 then -- published in
work = mw.wikibase.label(cited.claims.P1433[1].mainsnak.datavalue.value.id)
end
if not url and cited.claims.P856 then -- LAST: official website
url = cited.claims.P856[1].mainsnak.datavalue.value
end
-- build it
journal_mode_p = number
if cited.claims.P2093 then -- author name string
for k, v in pairs(cited.claims.P2093) do
table.insert(authors, v.mainsnak.datavalue.value)
end
table.insert(items, table.concat(authors, '、'))
if journal_mode_p and date then
items[#items] = items[#items] .. '(' .. render_date(date) .. ')'
end
end
if date and not journal_mode_p then
table.insert(items, render_date(date))
end
if title then
if url then
local t = '[' .. url .. ' ' .. render_title(title, work) .. ']'
t = t:gsub('”%.', '%.”'):gsub("^(%[%S+ .-) (''.-'')(%])$", '%1%3%2')
table.insert(items, t)
else
table.insert(items, render_title(title, work))
end
if volume and number then
items[#items] = items[#items] .. ' 第' .. volume .. '卷' .. number .. '期'
elseif volume then
items[#items] = items[#items] .. ' 第' .. volume .. '卷'
elseif number then
items[#items] = items[#items] .. ' 第' .. number .. '號'
end
end
if doi then
table.insert(items, 'doi:' .. doi)
elseif url then
table.insert(items, url)
end
it = table.concat(items, '。') .. '。'
return it
end
local function render_references ( refs, frame )
local it = ''
local nodes = {}
local cats = {}
for _, ref in pairs(refs) do
local node, title, id, id_owner, isbn, url, access_date, wikipedia_p
if ref.snaks.P248 then -- Stated in。陣列(??),可能係 PubMed、人口普查、書、其他印刷品等等
-- XXX ref.snaks.P248 係陣列,理論上可能多過一個數值,暫時求其用第一個
node = citeitem(ref.snaks.P248[1].datavalue.value.id)
cats[cat_cite_as_stated_in] = true
elseif ref.snaks.P854 then -- 參考網址
url = ref.snaks.P854[1].datavalue.value
cats[cat_cite_web] = true
node = ''
end
if title and not node then
node = render_title(title)
end
for key, val in pairs(ref.snaks) do
-- XXX val 係陣列,理論上可能多過一個數值,暫時求其用第一個
if key == 'P143' then
wikipedia_p = true -- but might be false in reality
elseif key == 'P248' then
--
elseif key == 'P813' then
access_date = val[1].datavalue.value
elseif val[1].datatype == 'external-id' then
id = val[1].datavalue.value
id_owner = mw.wikibase.label(key)
local other = mw.wikibase.getEntity(key)
if other and other.claims and other.claims.P1630 then -- formatter URL
-- XXX other.claims.P1630 係陣列,理論上可能多過一個數值,暫時求其用第一個
url = other.claims.P1630[1].mainsnak.datavalue.value:gsub('%$1', val[1].datavalue.value)
if not cats[cat_cite_as_stated_in] then
cats[cat_cite_web] = true
end
end
end
end
if not node and not wikipedia_p then
if id then
node = id_owner .. ' ' .. id .. '。'
else
node = z.cvs(ref) -- 暫時槷位
end
end
if node then
if url then
node = node .. url
if access_date then
local t = render_date(access_date)
if t then
node = node .. ',喺' .. t .. '搵到。'
end
end
end
-- node = node .. '<!-- ' .. z.cvs(ref) .. ' -->'
if #node > 0 then
table.insert(nodes, node)
end
end
end
if #nodes > 0 then
for _, node in pairs(nodes) do
local id = 'g-' .. mw.hash.hashValue('md5', node)
if frame.extensionTag then -- real production environment
it = it .. frame:extensionTag('ref', node, { name = id })
else -- debug/test
it = it .. '<ref name="' .. id .. '">' .. node .. '</ref>'
end
end
end
return it, cats
end
-------------------------------------------------------------------------------
local function valid_scaled_probability_p ( probability )
return probability >= 0 and probability <= 100
end
local function annotate_qual ( quals )
local pRange = ''
for k, qual in pairs(quals) do
if k > 1 then
pRange = pRange .. comma
end
local qualId = qual.datavalue.value[ 'numeric-id' ]
local link = mw.wikibase.sitelink( 'Q' .. qualId )
local label = overrides[qualId] or mw.wikibase.label( 'Q' .. qualId )
if link then
if link ~= label then
label = '[[' .. link .. '|' .. label .. ']]'
else
label = '[[' .. label .. ']]'
end
end
pRange = pRange .. label
end
return pRange
end
p.main = function ( frame )
--local entity = mw.wikibase.getEntity( 'Q133087' )
local entity = mw.wikibase.getEntity( frame.args.qId or nil )
if entity then
local prevalenceClaims = entity.claims and entity.claims.P1193
-- TODO: Get best claim, not always just the first one.
-- Probably use getBestStatements.
if prevalenceClaims then
local it = {}
local cats = {}
local number_of_annotated_values = 0
local number_of_unannotated_values = 0
-- Run through all prevalence claims
for i, prevalenceClaim in pairs( prevalenceClaims ) do
local segment = {}
local prevalenceValue = prevalenceClaim.mainsnak.datavalue.value
if prevalenceValue then
local pRange
if prevalenceValue.lowerBound and prevalenceValue.upperBound then
local lowerBound = prevalenceValue.lowerBound * 100
local upperBound = prevalenceValue.upperBound * 100
if valid_scaled_probability_p(lowerBound) and valid_scaled_probability_p(upperBound) then
pRange = lowerBound
if lowerBound ~= upperBound then
pRange = pRange .. label_to .. upperBound
end
else
cats[cat_value_out_of_range] = true
end
else
local value = prevalenceValue.amount * 100
if valid_scaled_probability_p(value) then
pRange = value
else
cats[cat_value_out_of_range] = true
end
end
if pRange then
pRange = pRange .. '%'
if prevalenceClaim.qualifiers then
-- Qualifiers for prevalence are currently unstandardized.
-- Keep guessing until the right one is found.
-- Actually go through every key, for tracking purposes
local quals, quals2
for k, v in pairs(prevalenceClaim.qualifiers) do
if k == 'P276' or -- location
k == 'P1001' or -- applies to jurisdiction
k == 'P131' or -- 位於政區
k == 'P17' then -- country
if quals then
cats[cat_conflicting_qualifiers] = true
else
quals = v
end
elseif k == 'P6001' or -- applies to people
k == 'P642' then -- 之於
if quals2 then
cats[cat_conflicting_qualifiers] = true
else
quals2 = v
end
else
cats[cat_unknown_qualifier] = true
end
end
if quals and quals2 then
pRange = pRange .. parenleft .. annotate_qual(quals) .. '嘅' .. annotate_qual(quals2) .. parenright
number_of_annotated_values = number_of_annotated_values + 1
elseif quals then
pRange = pRange .. parenleft .. annotate_qual(quals) .. parenright
number_of_annotated_values = number_of_annotated_values + 1
elseif quals2 then
pRange = pRange .. parenleft .. annotate_qual(quals2) .. parenright
number_of_annotated_values = number_of_annotated_values + 1
else
number_of_unannotated_values = number_of_unannotated_values + 1
end
else
number_of_unannotated_values = number_of_unannotated_values + 1
end
table.insert(segment, pRange)
end
end
local rendered_segment = table.concat(segment, comma)
if prevalenceClaim.references then
local result, new_cats = render_references(prevalenceClaim.references, frame)
rendered_segment = rendered_segment .. result
for k, v in pairs(new_cats) do
cats[k] = v
end
end
if #rendered_segment > 0 then
table.insert(it, rendered_segment)
end
end
-- sanity check - output as if nothing happened but track the problems
if number_of_unannotated_values > 1 then
cats[cat_more_than_one_unannotated_value] = true
end
if number_of_annotated_values > 0 and number_of_unannotated_values > 0 then
cats[cat_unannotated_value_in_annotated_list] = true
end
local it = table.concat(it, comma)
for k, v in pairs(cats) do
it = it .. k
end
return it
end
end
return ''
end
p.test = function ()
local q = require('模組:PrevalenceData/testcases');
return q.test(p);
end
p.citeitem = citeitem
return p
-- vi: set sw=4 ts=4 ai sm: