From c1aa59a5da0b59dab98d73d1b4a343870e13e3a1 Mon Sep 17 00:00:00 2001 From: hooke007 Date: Thu, 11 May 2023 22:00:32 +0100 Subject: [PATCH] =?UTF-8?q?=E5=90=8C=E6=AD=A5=E4=B8=8E=E6=95=B4=E5=90=88?= =?UTF-8?q?=E4=B8=8A=E6=B8=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 核心: mpv 缓存选项细化 input_easy 快捷键中的个别数值修改为浮点 脚本: osc_plus 的局部渲染优化 thumbfast 修正预览精度,增加预览质量的选择 uosc 启用空闲界面;翻译词条增补修正;修复一个原因未知的崩溃 着色器: nlmeans_hqx 取代 nlmeans_hq;增加 nlmeans_2x --- portable_config/input_easy.conf | 20 +- portable_config/mpv.conf | 9 +- portable_config/script-opts.conf | 45 +- portable_config/script-opts/console.conf | 9 +- portable_config/script-opts/thumbfast.conf | 8 +- portable_config/script-opts/uosc.conf | 3 + portable_config/script-opts/uosc_lang.conf | 70 +- portable_config/script-opts/ytdl_hook.conf | 2 +- portable_config/scripts/osc_plus.lua | 67 +- portable_config/scripts/thumbfast.lua | 375 ++- .../scripts/uosc/elements/Logo.lua | 61 + .../scripts/uosc/elements/Menu.lua | 4 +- .../scripts/uosc/elements/Timeline.lua | 2 +- .../scripts/uosc/elements/TopBar.lua | 4 + portable_config/scripts/uosc/lib/ass.lua | 2 +- portable_config/scripts/uosc/lib/lang.lua | 72 +- portable_config/scripts/uosc/lib/menus.lua | 10 +- portable_config/scripts/uosc/lib/text.lua | 4 +- portable_config/scripts/uosc/lib/utils.lua | 2 +- portable_config/scripts/uosc/main.lua | 103 +- portable_config/shaders/guided.glsl | 11 +- portable_config/shaders/guided_lgc.glsl | 7 +- portable_config/shaders/guided_s.glsl | 2 +- portable_config/shaders/nlmeans.glsl | 680 +++--- portable_config/shaders/nlmeans_2x.glsl | 1247 ++++++++++ portable_config/shaders/nlmeans_hq.glsl | 2161 ----------------- portable_config/shaders/nlmeans_hqx.glsl | 1288 ++++++++++ portable_config/shaders/nlmeans_lgc.glsl | 671 ++--- portable_config/shaders/nlmeans_lq.glsl | 674 ++--- portable_config/shaders/nlmeans_temporal.glsl | 701 +++--- 30 files changed, 4650 insertions(+), 3664 deletions(-) create mode 100644 portable_config/scripts/uosc/elements/Logo.lua create mode 100644 portable_config/shaders/nlmeans_2x.glsl delete mode 100644 portable_config/shaders/nlmeans_hq.glsl create mode 100644 portable_config/shaders/nlmeans_hqx.glsl diff --git a/portable_config/input_easy.conf b/portable_config/input_easy.conf index d5acb2df..9db0dc69 100644 --- a/portable_config/input_easy.conf +++ b/portable_config/input_easy.conf @@ -32,16 +32,16 @@ o show-progress # 临时显示时间码/进度条 O no-osd cycle-values osd-level 3 1 # 常驻显示时间码 - 1 add contrast -1 # 对比度 - - 2 add contrast 1 # 对比度 + - 3 add brightness -1 # 明度 - - 4 add brightness 1 # 明度 + - 5 add gamma -1 # 伽马 - - 6 add gamma 1 # 伽马 + - 7 add saturation -1 # 饱和度/纯度 - - 8 add saturation 1 # 饱和度/纯度 + - 9 add hue -1 # 色相 - - 0 add hue 1 # 色相 + + 1 add contrast -0.5 # 对比度 - + 2 add contrast 0.5 # 对比度 + + 3 add brightness -0.5 # 明度 - + 4 add brightness 0.5 # 明度 + + 5 add gamma -0.5 # 伽马 - + 6 add gamma 0.5 # 伽马 + + 7 add saturation -0.5 # 饱和度/纯度 - + 8 add saturation 0.5 # 饱和度/纯度 + + 9 add hue -0.5 # 色相 - + 0 add hue 0.5 # 色相 + Ctrl+BS set contrast 0 ; set brightness 0 ; set gamma 0 ; set saturation 0 ; set hue 0 # 重置(视频)均衡器 ##当输出旋转之后,垂直方向因跟随输出方向的变化而改变。输出旋转操作会启动缩略图重建进程 diff --git a/portable_config/mpv.conf b/portable_config/mpv.conf index 3eceb65b..85699d72 100644 --- a/portable_config/mpv.conf +++ b/portable_config/mpv.conf @@ -83,11 +83,14 @@ ##⇘⇘缓存相关 demuxer-max-bytes = 150MiB # 播放网络视频时的向后缓存大小(KiB或MiB),默认 150MiB - icc-cache-dir = - # 指定目录存储和加载从ICC配置文件创建的3dlut缓存(例值 "~~/_cache/icc"),默认为空(内存) + icc-cache = no # 是否在本地存储ICC配置文件的3dlut缓存,默认 no # 可以用来加快加载速度,未压缩的LUT的大小取决于 --icc-3dlut-size + icc-cache-dir = + # 指定ICC配置文件的3dlut缓存目录(例值 "~~/_cache/icc"),WIN平台默认为主设置路径 + + gpu-shader-cache = no # 是否在本地存储GLSL着色器的编译缓存,可以提高启动性能,默认 no gpu-shader-cache-dir = - # 在此目录中存储和加载已编译的GLSL着色器缓存,可以提高启动性能(例值 "~~/_cache/shader"),默认为空(内存) + # 指定GLSL着色器的编译缓存目录(例值 "~~/_cache/shader"),WIN平台默认为主设置路径 watch-later-directory = # 稍后观看功能的缓存目录,其中的文件记录 --watch-later-options 指定的项。默认为空(实际路径为 "~~/watch_later") diff --git a/portable_config/script-opts.conf b/portable_config/script-opts.conf index 2bcd88a0..7a5d4344 100644 --- a/portable_config/script-opts.conf +++ b/portable_config/script-opts.conf @@ -10,9 +10,12 @@ # 控制台 # ########## - script-opts-append = console-scale=1 # 字体缩放。当 --hidpi-window-scale=no 时,不再考量显示缩放率 - script-opts-append = console-font= # 指定控制台的字体 - script-opts-append = console-font_size=24 # 字体大小默认16。最终大小将与缩放率相乘 + script-opts-append = console-scale=1 # 字体缩放(当 --hidpi-window-scale=no 时,不再考量DPI的影响),默认 1 + script-opts-append = console-font= # 指定控制台的字体,默认为空 + script-opts-append = console-font_size=16 # 字体大小,默认 16(最终大小将与前项之中的 scale 相乘) + script-opts-append = console-border_size=1 # 字体边框宽度,默认 1 + + script-opts-append = console-history_dedup=yes # 移除历史记录中的重复条目,只保留最新的一项,默认 yes @@ -149,7 +152,7 @@ script-opts-append = ytdl_hook-try_ytdl_first=no # 首选尝试用YTDL解析(默认 no) - script-opts-append = ytdl_hook-exclude="%.avi$|%.flac$|%.flv$|%.mp3$|%.m3u$|%.m3u8$|%.m4a$|%.m4v$|%.mkv$|%.mp4$|%.ts$|%.VOB$|%.wav$|%.webm$|%.wmw$" + script-opts-append = ytdl_hook-exclude="%.avi$|%.flac$|%.flv$|%.mp3$|%.m3u$|%.m3u8$|%.m4a$|%.m4v$|%.mkv$|%.mp4$|%.ts$|%.VOB$|%.wav$|%.webm$|%.wmv$" # 解析地址黑名单,格式解释见 https://mpv.io/manual/master/#options-exclude # 推荐在 try_ytdl_first=yes 的情况下使用,可合理加速部分地址的解析 script-opts-append = ytdl_hook-all_formats=no # 默认 no https://mpv.io/manual/master/#options-all-formats @@ -425,26 +428,28 @@ ################ script-opts-append = thumbfast-socket= - # Socket 路径,留空即自动 + # Socket 路径,留空即自动 script-opts-append = thumbfast-tnpath= - # 缩略图缓存路径,留空即自动 - script-opts-append = thumbfast-max_height=300 # 缩略图的尺寸,以像素为单位,默认 300 300 + # 缩略图缓存路径,留空即自动 + script-opts-append = thumbfast-max_height=300 # 缩略图的尺寸,以像素为单位,默认 300 300 script-opts-append = thumbfast-max_width=300 - script-opts-append = thumbfast-overlay_id=42 # 勿改 + script-opts-append = thumbfast-overlay_id=42 # 勿改 - script-opts-append = thumbfast-spawn_first=no # 加载文件时就开始生成缩略图,默认 no - script-opts-append = thumbfast-network=no # 是否对流媒体启用,默认 no - script-opts-append = thumbfast-audio=no # 是否对音频文件启用,默认 no - script-opts-append = thumbfast-hwdec=yes # 是否使用硬解加速,默认 yes - script-opts-append = thumbfast-direct_io=yes # [仅Windows且LuaJIT] 使用Windows的原生API来写入pipe。默认 yes + script-opts-append = thumbfast-spawn_first=no # 加载文件时就开始生成缩略图,默认 no + script-opts-append = thumbfast-quit_after_inactivity=0 # 是否退出超时未活动的缩略图进程(秒),默认 0 即禁用 + script-opts-append = thumbfast-network=no # 是否对流媒体启用,默认 no + script-opts-append = thumbfast-audio=no # 是否对音频文件启用,默认 no + script-opts-append = thumbfast-hwdec=yes # 是否使用硬解加速,默认 yes + script-opts-append = thumbfast-direct_io=yes # [仅Windows且LuaJIT] 使用Windows的原生API来写入pipe。默认 yes - script-opts-append = thumbfast-sw_threads=2 # 软解线程数,默认 2 - script-opts-append = thumbfast-binpath=mpv # 自定义mpv路径,Mac使用bundle-app用户可选值为bundle,默认 mpv - script-opts-append = thumbfast-min_duration=0 # 是否只对时长高于该值的视频启用(秒),默认 0 即禁用 - script-opts-append = thumbfast-precise=auto # <默认auto|yes|no> 启用高精度预览,yes即始终精确帧,no即始终关键帧,默认即仅光标静止时为精确帧 - script-opts-append = thumbfast-frequency=0.1 # 解码频率(秒),默认 0.1 - script-opts-append = thumbfast-auto_run=yes # 自动运行,默认 yes + script-opts-append = thumbfast-sw_threads=2 # 软解线程数,默认 2 + script-opts-append = thumbfast-binpath=mpv # 自定义mpv路径,Mac使用bundle-app用户可选值为bundle,默认 mpv + script-opts-append = thumbfast-min_duration=0 # 是否只对时长高于该值的视频启用(秒),默认 0 即禁用 + script-opts-append = thumbfast-precise=0 # <默认0|1|2> 预览精度。0 为自动(仅光标静止时为精确帧),1 为始终关键帧,2 为始终精确帧 + script-opts-append = thumbfast-quality=1 # <0|默认1|2> 预览质量。0 为自动,1 为不映射hdr,2 为减少锯齿并支持将一般hdr源映射到sdr + script-opts-append = thumbfast-frequency=0.1 # 解码频率(秒),默认 0.1 + script-opts-append = thumbfast-auto_run=yes # 自动运行,默认 yes @@ -562,5 +567,7 @@ script-opts-append = uosc-chapter_range_patterns=openings:オープニング;endings:エンディング # 补充额外的lua模式来识别简单章节范围的起始点(除 ads 外的所有章节)。示例即默认值 + script-opts-append = uosc-idlescreen=yes # 空闲是否显示图标,默认 yes + script-opts-append = uosc-idlemsg=default # 空闲显示的文字信息,默认 default script-opts-append = uosc-idle_call_menu=0 # 空闲自动弹出上下文菜单。设置为 <0.02-2> 之间的数为延迟触发的时间,否则禁用(默认 0) script-opts-append = custom_font=default # 自定义界面字体,默认值 default 即使用 mpv.conf 中 --osd-font 的字体 diff --git a/portable_config/script-opts/console.conf b/portable_config/script-opts/console.conf index 55e9ffe4..99d1c1a3 100644 --- a/portable_config/script-opts/console.conf +++ b/portable_config/script-opts/console.conf @@ -3,9 +3,12 @@ scale=1 ##字体缩放率。当 --hidpi-window-scale=no 时,不再考量显示缩放率 - font= ##指定控制台的字体 +font_size=16 +##字体大小,默认16。最终大小将与 --scale 相乘 +border_size=1 +##字体边框宽度,默认1。 -font_size=24 -##字体大小默认16。最终大小将与 --scale 相乘 +history_dedup=yes +##移除历史记录中的重复条目,只保留最新的一项,默认yes。 diff --git a/portable_config/script-opts/thumbfast.conf b/portable_config/script-opts/thumbfast.conf index b363b6b9..73c73511 100644 --- a/portable_config/script-opts/thumbfast.conf +++ b/portable_config/script-opts/thumbfast.conf @@ -16,6 +16,8 @@ overlay_id=42 # 加载文件时就开始生成缩略图,默认 no spawn_first=no +# 是否退出超时未活动的缩略图进程(秒),默认 0 即禁用 +quit_after_inactivity=0 # 是否对流媒体启用,默认 no network=no # 是否对音频文件启用,默认 no @@ -31,8 +33,10 @@ sw_threads=2 binpath=mpv # 是否只对时长高于该值的视频启用(秒),默认 0 即禁用 min_duration=0 -# <默认auto|yes|no> 启用高精度预览,yes即始终精确帧,no即始终关键帧,默认即仅光标静止时为精确帧 -precise=auto +# <默认0|1|2> 预览精度。0 为自动(仅光标静止时为精确帧),1 为始终关键帧,2 为始终精确帧 +precise=0 +# <0|默认1|2> 预览质量。0 为自动,1 为不映射hdr,2 为减少锯齿并支持将一般hdr源映射到sdr +quality=1 # 解码频率(秒),默认 0.1 frequency=0.1 # 自动运行,默认 yes diff --git a/portable_config/script-opts/uosc.conf b/portable_config/script-opts/uosc.conf index b8536db4..ee3660d9 100644 --- a/portable_config/script-opts/uosc.conf +++ b/portable_config/script-opts/uosc.conf @@ -148,6 +148,9 @@ adjust_osd_margins=no chapter_ranges=openings:30ABF964,endings:30ABF964,ads:C54E4E80 chapter_range_patterns=openings:オープニング;endings:エンディング +# 空闲是否显示logo(默认 yes),和自定义的文字信息(默认值 default 即不显示文字) +idlescreen=yes +idlemsg=default # 空闲自动弹出上下文菜单。设置为 <0.02-2> 之间的数为延迟触发的时间,否则禁用(默认) idle_call_menu=0 # 自定义界面字体,默认值 default 即使用主设置中 --osd-font 的字体 diff --git a/portable_config/script-opts/uosc_lang.conf b/portable_config/script-opts/uosc_lang.conf index 37f441f3..d153a5d1 100644 --- a/portable_config/script-opts/uosc_lang.conf +++ b/portable_config/script-opts/uosc_lang.conf @@ -1,34 +1,37 @@ ## context menu default -_load=load -_file_browser=file browser -_import_sid=import sid -_navigation=navigation -_playlist=playlist -_edition_list=edition list -_chapter_list=chapter list -_vid_list=vid list -_aid_list=aid list -_sid_list=sid list -_playlist_shuffle=playlist shuffle -_ushot=uscreenshot -_VIDEO=VIDEO -_decoding_api=hwdec cycle -_deband_toggle=deband toggle -_deint_toggle=deint toggle -_icc_toggle=icc auto toggle -_corpts_toggle=correct pts toggle -_TOOLS=TOOLS -_stats_toggle=stats toggle -_console_on=console on -_border_toggle=border toggle -_ontop_toggle=ontop toggle -_audio_device=audio device -_stream_quality=stream quality -_show_file_dir=show file dir -_show_config_dir=show config dir -_stop=stop -_quit=quit +_cm_load=Load +_cm_file_browser=File Browser +_cm_import_sid=Import SID +_cm_navigation=Navigation +_cm_playlist=Playlist +_cm_edition_list=Edition-list +_cm_chapter_list=Chapter-list +_cm_vid_list=VID-list +_cm_aid_list=AID-list +_cm_sid_list=SID-list +_cm_playlist_shuffle=Playlist Shuffle +_cm_ushot=uScreenshot +_cm_video=VIDEO +_cm_decoding_api=hwdec cycle +_cm_deband_toggle=deband toggle +_cm_deint_toggle=deint toggle +_cm_icc_toggle=icc auto toggle +_cm_corpts_toggle=correct pts toggle +_cm_tools=TOOLS +_cm_stats_toggle=stats toggle +_cm_console_on=console on +_cm_border_toggle=border toggle +_cm_ontop_toggle=ontop toggle +_cm_audio_device=audio device +_cm_stream_quality=Stream Quality +_cm_show_file_dir=show file dir +_cm_show_config_dir=show config dir +_cm_stop=Stop +_cm_quit=Quit + +## no_border_title +_border_title=No File ## track_loaders sub_menu _sid_menu=subtitle track @@ -41,22 +44,25 @@ _aid_submenu_title=aid list _vid_submenu_title=vid list _playlist_submenu_title=playlist _chapter_list_submenu_title=chapter list +_chapter_list_submenu_item_title=unnamed chapter _edition_list_submenu_title=edition list _edition_list_submenu_item_title=edition _stream_quality_submenu_title=stream quality list _audio_device_submenu_title=audio device list +_audio_device_submenu_item_title=Autoselect device _submenu_import=import _submenu_load_file=load file _submenu_id_disabled=disabled +_submenu_id_hint=channel(s) _submenu_id_forced=forced _submenu_id_default=default _submenu_id_external=external _submenu_id_title=track +_submenu_file_browser_item_hint=driver list +_submenu_file_browser_item_hint2=parent dir +_submenu_file_browser_item2_hint=driver _submenu_file_browser_title=driver list -_submenu_file_browser_item_title=parent dir -_submenu_file_browser_item2_title=driver -_submenu_file_browser_item3_title=driver list ## built-in_shortcut _button01=MENU diff --git a/portable_config/script-opts/ytdl_hook.conf b/portable_config/script-opts/ytdl_hook.conf index 5893731e..9b60db3e 100644 --- a/portable_config/script-opts/ytdl_hook.conf +++ b/portable_config/script-opts/ytdl_hook.conf @@ -6,7 +6,7 @@ try_ytdl_first=no ##解析地址黑名单,格式解释见 https://mpv.io/manual/master/#options-exclude ##推荐在 try_ytdl_first=yes 的情况下使用,可合理加速网络地址的解析 -exclude="%.avi$|%.flac$|%.flv$|%.mp3$|%.m3u$|%.m3u8$|%.m4a$|%.m4v$|%.mkv$|%.mp4$|%.ts$|%.VOB$|%.wav$|%.webm$|%.wmw$" +exclude="%.avi$|%.flac$|%.flv$|%.mp3$|%.m3u$|%.m3u8$|%.m4a$|%.m4v$|%.mkv$|%.mp4$|%.ts$|%.VOB$|%.wav$|%.webm$|%.wmv$" ##https://mpv.io/manual/master/#options-all-formats all_formats=no diff --git a/portable_config/scripts/osc_plus.lua b/portable_config/scripts/osc_plus.lua index ad9f0182..9108658b 100644 --- a/portable_config/scripts/osc_plus.lua +++ b/portable_config/scripts/osc_plus.lua @@ -1,10 +1,10 @@ --[[ SOURCE_ https://github.com/mpv-player/mpv/blob/master/player/lua/osc.lua -COMMIT_ 292a5868cb60c481ae9eaed7d21e67dcff41938f +COMMIT_ b7ffe0d16eec8153d9609382997baaf6a29e5e4f 文档_ https://github.com/hooke007/MPV_lazy/discussions/18 改进版本的OSC,不兼容其它OSC类脚本(实现全部功能需搭配 新缩略图引擎 thumbfast ) -(可选)mpv.conf的前置条件 --osc=no (否则个别功能不可用,例如 启动时显示OSC) +(可选)mpv.conf的前置条件 --osc=no (否则个别功能可能不可用) 示例在 input.conf 中写入: SHIFT+DEL script-binding osc_plus/visibility # 切换 osc_plus 的可见性 @@ -2408,8 +2408,8 @@ function osc_init() -- mouse move events may pile up during seeking and may still get -- sent when the user is done seeking, so we need to throw away -- identical seeks - thumbfast.pause = false --暂停渲染缩略图 - mp.commandv("script-message-to", "thumbfast", "clear") + thumbfast.pause = false -- 暂停渲染缩略图 + -- mp.commandv("script-message-to", "thumbfast", "clear") -- 会有高几率冻结 local seekto = get_slider_value(element) if (element.state.lastseek == nil) or (not (element.state.lastseek == seekto)) then @@ -2639,6 +2639,12 @@ end function hide_osc() msg.trace("hide_osc") + + -- 关联 thumbfast.lua + if thumbfast.width ~= 0 or thumbfast.height ~= 0 then + mp.commandv("script-message-to", "thumbfast", "clear") + end + if not state.enabled then -- typically hide happens at render() from tick(), but now tick() is -- no-op and won't render again to remove the osc, so do that manually. @@ -2800,14 +2806,14 @@ function render() --mouse show/hide area for k,cords in pairs(osc_param.areas["showhide"]) do - set_virt_mouse_area(cords.x1, cords.y1, cords.x2, cords.y2, "showhide") + set_virt_mouse_area(cords.x1, cords.y1, cords.x2, cords.y2, "showhide_osc_plus") end if osc_param.areas["showhide_wc"] then for k,cords in pairs(osc_param.areas["showhide_wc"]) do - set_virt_mouse_area(cords.x1, cords.y1, cords.x2, cords.y2, "showhide_wc") + set_virt_mouse_area(cords.x1, cords.y1, cords.x2, cords.y2, "showhide_wc_osc_plus") end else - set_virt_mouse_area(0, 0, 0, 0, "showhide_wc") + set_virt_mouse_area(0, 0, 0, 0, "showhide_wc_osc_plus") end do_enable_keybindings() @@ -2816,13 +2822,13 @@ function render() for _,cords in ipairs(osc_param.areas["input"]) do if state.osc_visible then -- activate only when OSC is actually visible - set_virt_mouse_area(cords.x1, cords.y1, cords.x2, cords.y2, "input") + set_virt_mouse_area(cords.x1, cords.y1, cords.x2, cords.y2, "input_osc_plus") end if state.osc_visible ~= state.input_enabled then if state.osc_visible then - mp.enable_key_bindings("input") + mp.enable_key_bindings("input_osc_plus") else - mp.disable_key_bindings("input") + mp.disable_key_bindings("input_osc_plus") end state.input_enabled = state.osc_visible end @@ -2835,13 +2841,13 @@ function render() if osc_param.areas["window-controls"] then for _,cords in ipairs(osc_param.areas["window-controls"]) do if state.osc_visible then -- activate only when OSC is actually visible - set_virt_mouse_area(cords.x1, cords.y1, cords.x2, cords.y2, "window-controls") + set_virt_mouse_area(cords.x1, cords.y1, cords.x2, cords.y2, "window-controls_osc_plus") end if state.osc_visible ~= state.windowcontrols_buttons then if state.osc_visible then - mp.enable_key_bindings("window-controls") + mp.enable_key_bindings("window-controls_osc_plus") else - mp.disable_key_bindings("window-controls") + mp.disable_key_bindings("window-controls_osc_plus") end state.windowcontrols_buttons = state.osc_visible end @@ -3018,6 +3024,9 @@ function tick() -- render idle message msg.trace("idle message") local _, _, display_aspect = mp.get_osd_size() + if display_aspect == 0 then + return + end local display_h = 360 local display_w = display_h * display_aspect -- logo is rendered at 2^(6-1) = 32 times resolution with size 1800x1800 @@ -3050,8 +3059,8 @@ function tick() set_osd(display_w, display_h, ass.text) if state.showhide_enabled then - mp.disable_key_bindings("showhide") - mp.disable_key_bindings("showhide_wc") + mp.disable_key_bindings("showhide_osc_plus") + mp.disable_key_bindings("showhide_wc_osc_plus") state.showhide_enabled = false end @@ -3086,8 +3095,8 @@ end function do_enable_keybindings() if state.enabled then if not state.showhide_enabled then - mp.enable_key_bindings("showhide", "allow-vo-dragging+allow-hide-cursor") - mp.enable_key_bindings("showhide_wc", "allow-vo-dragging+allow-hide-cursor") + mp.enable_key_bindings("showhide_osc_plus", "allow-vo-dragging+allow-hide-cursor") + mp.enable_key_bindings("showhide_wc_osc_plus", "allow-vo-dragging+allow-hide-cursor") end state.showhide_enabled = true end @@ -3100,8 +3109,8 @@ function enable_osc(enable) else hide_osc() -- acts immediately when state.enabled == false if state.showhide_enabled then - mp.disable_key_bindings("showhide") - mp.disable_key_bindings("showhide_wc") + mp.disable_key_bindings("showhide_osc_plus") + mp.disable_key_bindings("showhide_wc_osc_plus") end state.showhide_enabled = false end @@ -3204,11 +3213,11 @@ end) mp.set_key_bindings({ {"mouse_move", function(e) process_event("mouse_move", nil) end}, {"mouse_leave", mouse_leave}, -}, "showhide", "force") +}, "showhide_osc_plus", "force") mp.set_key_bindings({ {"mouse_move", function(e) process_event("mouse_move", nil) end}, {"mouse_leave", mouse_leave}, -}, "showhide_wc", "force") +}, "showhide_wc_osc_plus", "force") do_enable_keybindings() --mouse input bindings @@ -3227,14 +3236,14 @@ mp.set_key_bindings({ {"mbtn_left_dbl", "ignore"}, {"shift+mbtn_left_dbl", "ignore"}, {"mbtn_right_dbl", function(e) process_event("mbtn_right_dbl", "press") end}, -- 右键双击检查 -}, "input", "force") -mp.enable_key_bindings("input") +}, "input_osc_plus", "force") +mp.enable_key_bindings("input_osc_plus") mp.set_key_bindings({ {"mbtn_left", function(e) process_event("mbtn_left", "up") end, function(e) process_event("mbtn_left", "down") end}, -}, "window-controls", "force") -mp.enable_key_bindings("window-controls") +}, "window-controls_osc_plus", "force") +mp.enable_key_bindings("window-controls_osc_plus") function get_hidetimeout() if user_opts.visibility == "always" then @@ -3290,8 +3299,8 @@ function visibility_mode(mode, no_osd) -- Reset the input state on a mode change. The input state will be -- recalculated on the next render cycle, except in 'never' mode where it -- will just stay disabled. - mp.disable_key_bindings("input") - mp.disable_key_bindings("window-controls") + mp.disable_key_bindings("input_osc_plus") + mp.disable_key_bindings("window-controls_osc_plus") state.input_enabled = false update_margins() @@ -3386,5 +3395,5 @@ mp.register_script_message("thumbfast-info", function(json) end end) -set_virt_mouse_area(0, 0, 0, 0, "input") -set_virt_mouse_area(0, 0, 0, 0, "window-controls") +set_virt_mouse_area(0, 0, 0, 0, "input_osc_plus") +set_virt_mouse_area(0, 0, 0, 0, "window-controls_osc_plus") diff --git a/portable_config/scripts/thumbfast.lua b/portable_config/scripts/thumbfast.lua index 77a86f2f..47ace55c 100644 --- a/portable_config/scripts/thumbfast.lua +++ b/portable_config/scripts/thumbfast.lua @@ -1,6 +1,6 @@ --[[ SOURCE_ https://github.com/po5/thumbfast/blob/master/thumbfast.lua -COMMIT_ ddc61957ce38b62283c5d7ef99a7252c7499cc8b +COMMIT_ 8aa6faf10adad899e05cc9b850cde904d37515be 适配多个OSC类脚本的新缩略图引擎 @@ -13,26 +13,28 @@ COMMIT_ ddc61957ce38b62283c5d7ef99a7252c7499cc8b local options = { - socket = "", -- Socket path (leave empty for auto) - tnpath = "", -- 缩略图缓存路径(确保目录真实存在),留空即自动 + socket = "", -- Socket path (leave empty for auto) + tnpath = "", -- 缩略图缓存路径(确保目录真实存在),留空即自动 - max_height = 300, -- Maximum thumbnail size in pixels (scaled down to fit) Values are scaled when hidpi is enabled + max_height = 300, -- Maximum thumbnail size in pixels (scaled down to fit) Values are scaled when hidpi is enabled max_width = 300, - overlay_id = 42, -- Overlay id + overlay_id = 42, -- Overlay id - spawn_first = false, -- Spawn thumbnailer on file load for faster initial thumbnails - network = false, -- Enable on network playback - audio = false, -- Enable on audio playback - hwdec = true, -- 启用硬解加速 - direct_io = true, -- Windows only: use native Windows API to write to pipe (requires LuaJIT) + spawn_first = false, -- Spawn thumbnailer on file load for faster initial thumbnails + quit_after_inactivity = 0, -- Close thumbnailer process after an inactivity period in seconds, 0 to disable + network = false, -- Enable on network playback + audio = false, -- Enable on audio playback + hwdec = true, -- 启用硬解加速 + direct_io = true, -- Windows only: use native Windows API to write to pipe (requires LuaJIT) - sw_threads = 2, -- 软解线程 - binpath = "mpv", -- 自定义mpv路径 - min_duration = 0, -- 对短视频关闭预览(秒) - precise = "auto", -- 预览精度 - frequency = 0.1, -- 解码频率(秒) - auto_run = true, -- 自动运行 + sw_threads = 2, -- 软解线程 + binpath = "mpv", -- 自定义mpv路径 + min_duration = 0, -- 对短视频关闭预览(秒) + precise = 0, -- 预览精度 + quality = 1, -- 预览质量 + frequency = 0.1, -- 解码频率(秒) + auto_run = true, -- 自动运行 } @@ -40,6 +42,8 @@ mp.utils = require "mp.utils" mp.options = require "mp.options" mp.options.read_options(options) +local properties = {} + function subprocess(args, async, callback) callback = callback or function() end @@ -102,10 +106,15 @@ if options.direct_io then end end +local file = nil +local file_bytes = 0 local spawned = false -local network = false local disabled = false local spawn_waiting = false +local spawn_working = false +local script_written = false + +local dirty = false local x = nil local y = nil @@ -130,26 +139,8 @@ local has_vid = 0 local file_timer = nil local file_check_period = 1/60 -local first_file = false - -local function debounce(func, wait) - func = type(func) == "function" and func or function() end - wait = type(wait) == "number" and wait / 1000 or 0 - local timer = nil - local timer_end = function () - timer:kill() - timer = nil - func() - end - - return function () - if timer then - timer:kill() - end - timer = mp.add_timeout(wait, timer_end) - end -end +local mac_bundle_mode = false local client_script = [=[ #!/usr/bin/env bash @@ -179,23 +170,22 @@ local function get_os() raw_os_name = (raw_os_name):lower() local os_patterns = { - ["windows"] = "Windows", - - ["linux"] = "Linux", + ["windows"] = "windows", + ["linux"] = "linux", - ["osx"] = "Mac", - ["mac"] = "Mac", - ["darwin"] = "Mac", + ["osx"] = "darwin", + ["mac"] = "darwin", + ["darwin"] = "darwin", - ["^mingw"] = "Windows", - ["^cygwin"] = "Windows", + ["^mingw"] = "windows", + ["^cygwin"] = "windows", - ["bsd$"] = "Mac", - ["sunos"] = "Mac" + ["bsd$"] = "darwin", + ["sunos"] = "darwin" } - -- Default to linux - local str_os_name = "Linux" + -- 默认为WIN + local str_os_name = "windows" for pattern, name in pairs(os_patterns) do if raw_os_name:match(pattern) then @@ -207,10 +197,10 @@ local function get_os() return str_os_name end -local os_name = get_os() +local os_name = mp.get_property("platform") or get_os() if options.socket == "" then - if os_name == "Windows" then + if os_name == "windows" then options.socket = "thumbfast" else options.socket = "/tmp/thumbfast" @@ -218,7 +208,7 @@ if options.socket == "" then end if options.tnpath == "" then - if os_name == "Windows" then + if os_name == "windows" then options.tnpath = os.getenv("TEMP").."\\thumbfast.out" else options.tnpath = "/tmp/thumbfast.out" @@ -231,7 +221,7 @@ options.socket = options.socket .. unique options.tnpath = options.tnpath .. unique if options.direct_io then - if os_name == "Windows" then + if os_name == "windows" then winapi.socket_wc = winapi.MultiByteToWideChar("\\\\.\\pipe\\" .. options.socket) end @@ -242,17 +232,18 @@ end local mpv_path = options.binpath -if os_name == "Mac" and options.binpath == "bundle" and unique then +if os_name == "darwin" and options.binpath == "bundle" and unique then mpv_path = string.gsub(subprocess({"ps", "-o", "comm=", "-p", tostring(unique)}).stdout, "[\n\r]", "") mpv_path = string.gsub(mpv_path, "/mpv%-bundle$", "/mpv") + mac_bundle_mode = true end local function calc_dimensions() - local width = mp.get_property_number("video-params/w") - local height = mp.get_property_number("video-params/h") + local width = properties["video-params"] and properties["video-params"]["w"] + local height = properties["video-params"] and properties["video-params"]["h"] if not width or not height then return end - local scale = mp.get_property_number("display-hidpi-scale", 1) + local scale = properties["display-hidpi-scale"] or 1 if width / height > options.max_width / options.max_height then effective_w = math.floor(options.max_width * scale + 0.5) @@ -268,15 +259,13 @@ local info_timer = nil local auto_run = options.auto_run local function info(w, h) - local display_w, display_h = w, h + local short_video = mp.get_property_number("duration", 0) <= options.min_duration + local image = properties["current-tracks"] and properties["current-tracks"]["video"] and properties["current-tracks"]["video"]["image"] + local albumart = image and properties["current-tracks"]["video"]["albumart"] - network = mp.get_property_bool("demuxer-via-network", false) - local image = mp.get_property_native("current-tracks/video/image", true) - local albumart = image and mp.get_property_native("current-tracks/video/albumart", false) - local short_video = mp.get_property_native("duration", 0) <= options.min_duration disabled = (w or 0) == 0 or (h or 0) == 0 or has_vid == 0 or - (network and not options.network) or + (properties["demuxer-via-network"] and not options.network) or (albumart and not options.audio) or (image and not albumart) or (short_video and options.min_duration > 0) @@ -292,71 +281,140 @@ local function info(w, h) info_timer = mp.add_timeout(0.05, function() info(w, h) end) end - local json, err = mp.utils.format_json({width=display_w, height=display_h, disabled=disabled, available=true, socket=options.socket, tnpath=options.tnpath, overlay_id=options.overlay_id}) - mp.commandv("script-message", "thumbfast-info", json) + local json, err = mp.utils.format_json({width=w, height=h, disabled=disabled, available=true, socket=options.socket, tnpath=options.tnpath, overlay_id=options.overlay_id}) + mp.command_native_async({"script-message", "thumbfast-info", json}, function() end) end local function remove_thumbnail_files() + if file then + file:close() + file = nil + file_bytes = 0 + end os.remove(options.tnpath) os.remove(options.tnpath..".bgra") end +local activity_timer + +local scale_sw = "fast-bilinear" +local vf_str + +if options.quality == 0 then + if options.precise == 2 then + options.quality = 2 + elseif options.precise == 0 then + options.quality = 1 + elseif options.precise == 1 then + options.quality = 1 + end + if options.sw_threads >= 4 then + options.quality = 2 + elseif options.sw_threads == 1 then + options.quality = 1 + end +end + +if options.quality == 2 then + scale_sw = "bicublin" +end + +local function quality() + local vf_str_suffix = "format=fmt=bgra" + local vf_str_pre + if options.quality == 1 then + vf_str = "scale=w="..effective_w..":h="..effective_h..":flags=fast_bilinear,"..vf_str_suffix + elseif options.quality == 2 then + vf_str_pre = "scale=w="..effective_w..":h="..effective_h..":flags=bicublin," + vf_str = vf_str_pre..vf_str_suffix + if mp.get_property_number("video-params/sig-peak", 1) > 1 then + vf_str = vf_str_pre.."format=fmt=gbrapf32,zscale=transfer=linear,tonemap=tonemap=mobius:desat=8.0,zscale=transfer=709,"..vf_str_suffix + end + end + print(vf_str) + return vf_str +end + local function spawn(time) if disabled then return end - local path = mp.get_property("path") + local path = properties["path"] if path == nil then return end - local open_filename = mp.get_property("stream-open-filename") - local ytdl = open_filename and network and path ~= open_filename + if options.quit_after_inactivity > 0 then + if show_thumbnail or activity_timer:is_enabled() then + activity_timer:kill() + end + activity_timer:resume() + end + + local open_filename = properties["stream-open-filename"] + local ytdl = open_filename and properties["demuxer-via-network"] and path ~= open_filename if ytdl then path = open_filename end remove_thumbnail_files() - local vid = mp.get_property_number("vid") + local vid = properties["vid"] has_vid = vid or 0 local args = { - mpv_path, path, "--config=no", "--terminal=no", "--msg-level=all=no", "--idle=yes", "--keep-open=always","--pause=yes", "--ao=null", "--vo=null", + mpv_path, "--config=no", "--terminal=no", "--msg-level=all=no", "--idle=yes", "--keep-open=always","--pause=yes", "--ao=null", "--vo=null", "--load-auto-profiles=no", "--load-osd-console=no", "--load-stats-overlay=no", "--osc=no", "--vd-lavc-skiploopfilter=all", "--vd-lavc-skipidct=all", "--vd-lavc-software-fallback=1", "--vd-lavc-fast", "--vd-lavc-threads="..options.sw_threads, "--hwdec="..(options.hwdec and "auto" or "no"), - "--edition="..(mp.get_property_number("edition") or "auto"), "--vid="..(vid or "auto"), "--sub=no", "--audio=no", "--sub-auto=no", "--audio-file-auto=no", + "--edition="..(properties["edition"] or "auto"), "--vid="..(vid or "auto"), "--sub=no", "--audio=no", "--sub-auto=no", "--audio-file-auto=no", "--start="..time, "--ytdl-format=worst", "--demuxer-readahead-secs=0", "--demuxer-max-bytes=128KiB", "--gpu-dumb-mode=yes", "--tone-mapping=clip", "--hdr-compute-peak=no", - "--sws-scaler=fast-bilinear", "--sws-fast=yes", "--sws-allow-zimg=no", + "--sws-allow-zimg=no", "--sws-fast=yes", "--sws-scaler="..scale_sw, "--audio-pitch-correction=no", - "--vf=".."scale=w="..effective_w..":h="..effective_h..":flags=fast_bilinear,format=bgra", + "--vf="..quality(), "--ovc=rawvideo", "--of=image2", "--ofopts=update=1", "--ocopy-metadata=no", "--o="..options.tnpath } - if os_name == "Windows" then + if mac_bundle_mode then + table.insert(args, "--macos-app-activation-policy=accessory") + end + + if os_name == "windows" then table.insert(args, "--input-ipc-server="..options.socket) - else + elseif not script_written then local client_script_path = options.socket..".run" - local file = io.open(client_script_path, "w+") - if file == nil then + local script = io.open(client_script_path, "w+") + if script == nil then mp.msg.error("client script write failed") return else - file:write(string.format(client_script, options.socket)) - file:close() + script_written = true + script:write(string.format(client_script, options.socket)) + script:close() subprocess({"chmod", "+x", client_script_path}, true) - table.insert(args, "--script="..client_script_path) + table.insert(args, "--scripts="..client_script_path) end + else + local client_script_path = options.socket..".run" + table.insert(args, "--scripts="..client_script_path) end + table.insert(args, path) + spawned = true spawn_waiting = true subprocess(args, true, function(success, result) - if spawn_waiting and (success == false or result.status ~= 0) then + if spawn_waiting and (success == false or (result.status ~= 0 and result.status ~= -2)) then + spawned = false + spawn_waiting = false mp.msg.error("mpv subprocess create failed") + if not spawn_working then -- notify users of required configuration + mp.commandv("show-text", "thumbfast 子进程创建失败!", 5) + end + elseif success == true and result.status == 0 then + spawn_working = true + spawn_waiting = false end - spawned = false end ) end @@ -376,27 +434,34 @@ local function run(command) return end - local file = nil - if os_name == "Windows" then - file = io.open("\\\\.\\pipe\\"..options.socket, "r+") - else + local command_n = command.."\n" + + if os_name == "windows" then + if file and file_bytes + #command_n >= 4096 then + file:close() + file = nil + file_bytes = 0 + end + if not file then + file = io.open("\\\\.\\pipe\\"..options.socket, "r+b") + end + elseif not file then file = io.open(options.socket, "r+") end - if file ~= nil then - file:seek("end") - file:write(command.."\n") - file:close() + if file then + file_bytes = file:seek("end") + file:write(command_n) + file:flush() end end local function draw(w, h, script) if not w or not show_thumbnail then return end - local display_w, display_h = w, h if x ~= nil then - mp.command_native({name = "overlay-add", id=options.overlay_id, x=x, y=y, file=options.tnpath..".bgra", offset=0, fmt="bgra", w=display_w, h=display_h, stride=(4*display_w)}) + mp.command_native_async({name = "overlay-add", id=options.overlay_id, x=x, y=y, file=options.tnpath..".bgra", offset=0, fmt="bgra", w=w, h=h, stride=(4*w)}, function() end) elseif script then - local json, err = mp.utils.format_json({width=display_w, height=display_h, x=x, y=y, socket=options.socket, tnpath=options.tnpath, overlay_id=options.overlay_id}) + local json, err = mp.utils.format_json({width=w, height=h, x=x, y=y, socket=options.socket, tnpath=options.tnpath, overlay_id=options.overlay_id}) mp.commandv("script-message-to", script, "thumbfast-render", json) end end @@ -426,7 +491,7 @@ local function real_res(req_w, req_h, filesize) end local function move_file(from, to) - if os_name == "Windows" then + if os_name == "windows" then os.remove(to) end -- move the file because it can get overwritten while overlay-add is reading it, and crash the player @@ -435,9 +500,9 @@ end local function seek(fast) if last_seek_time then - if options.precise == true then run("async seek " .. last_seek_time .. " absolute+exact") - elseif options.precise == false then run("async seek " .. last_seek_time .. " absolute+keyframes") - elseif options.precise == "auto" then + if options.precise == 2 then run("async seek " .. last_seek_time .. " absolute+exact") + elseif options.precise == 1 then run("async seek " .. last_seek_time .. " absolute+keyframes") + elseif options.precise == 0 then run("async seek " .. last_seek_time .. (fast and " absolute+keyframes" or " absolute+exact")) end end @@ -479,10 +544,6 @@ local function check_new_thumb() local finfo = mp.utils.file_info(tmp) if not finfo then return false end spawn_waiting = false - if first_file then - request_seek() - first_file = false - end local w, h = real_res(effective_w, effective_h, finfo.size) if w then -- only accept valid thumbnails move_file(tmp, options.tnpath..".bgra") @@ -492,6 +553,9 @@ local function check_new_thumb() last_real_w, last_real_h = real_w, real_h info(real_w, real_h) end + if not show_thumbnail then + file_timer:kill() + end return true end return false @@ -504,6 +568,38 @@ file_timer = mp.add_periodic_timer(file_check_period, function() end) file_timer:kill() +local function clear() + file_timer:kill() + seek_timer:kill() + if options.quit_after_inactivity > 0 then + if show_thumbnail or activity_timer:is_enabled() then + activity_timer:kill() + end + activity_timer:resume() + end + last_seek_time = nil + show_thumbnail = false + last_x = nil + last_y = nil + if script_name then return end + mp.command_native_async({name = "overlay-remove", id=options.overlay_id}, function() end) +end + +local function quit() + activity_timer:kill() + if show_thumbnail then + activity_timer:resume() + return + end + run("quit") + spawned = false + real_w, real_h = nil, nil + clear() +end + +activity_timer = mp.add_timeout(options.quit_after_inactivity, quit) +activity_timer:kill() + local function thumb(time, r_x, r_y, script) if disabled then return end @@ -524,6 +620,13 @@ local function thumb(time, r_x, r_y, script) draw(real_w, real_h, script) end + if options.quit_after_inactivity > 0 then + if show_thumbnail or activity_timer:is_enabled() then + activity_timer:kill() + end + activity_timer:resume() + end + if time == last_seek_time then return end last_seek_time = time if not spawned then spawn(time) end @@ -531,18 +634,10 @@ local function thumb(time, r_x, r_y, script) if not file_timer:is_enabled() then file_timer:resume() end end -local function clear() - file_timer:kill() - seek_timer:kill() - last_seek = 0 - show_thumbnail = false - last_x = nil - last_y = nil - if script_name then return end - mp.command_native({name = "overlay-remove", id=options.overlay_id}) -end - local function watch_changes() + if not dirty or not properties["video-params"] then return end + dirty = false + local old_w = effective_w local old_h = effective_h @@ -559,19 +654,45 @@ local function watch_changes() if spawned then if resized then -- mpv doesn't allow us to change output size + local seek_time = last_seek_time run("quit") clear() spawned = false - spawn(last_seek_time or mp.get_property_number("time-pos", 0)) + spawn(seek_time or mp.get_property_number("time-pos", 0)) + file_timer:resume() end end last_has_vid = has_vid + + if not spawned and not disabled and options.spawn_first and resized then + spawn(mp.get_property_number("time-pos", 0)) + file_timer:resume() + end +end + +local function update_property(name, value) + properties[name] = value end -local watch_changes_debounce = debounce(watch_changes, 500) +local function update_property_dirty(name, value) + properties[name] = value + dirty = true +end + +local function update_tracklist(name, value) + -- current-tracks shim + for _, track in ipairs(value) do + if track.type == "video" and track.selected then + properties["current-tracks/video/image"] = track.image + properties["current-tracks/video/albumart"] = track.albumart + return + end + end +end local function sync_changes(prop, val) + update_property(prop, val) if val == nil then return end if type(val) == "boolean" then @@ -592,11 +713,12 @@ local function sync_changes(prop, val) if not spawned then return end run("set "..prop.." "..val) - watch_changes_debounce() + dirty = true end local function file_load() clear() + spawned = false real_w, real_h = nil, nil last_real_w, last_real_h = nil, nil last_seek_time = nil @@ -607,28 +729,27 @@ local function file_load() calc_dimensions() info(effective_w, effective_h) - if disabled then return end - - spawned = false - if options.spawn_first then - mp.add_timeout(0.1, function() - spawn(mp.get_property_number("time-pos", 0)) - first_file = true - end) - end end local function shutdown() run("quit") remove_thumbnail_files() - if os_name ~= "Windows" then + if os_name ~= "windows" then os.remove(options.socket) os.remove(options.socket..".run") end end -mp.observe_property("display-hidpi-scale", "native", watch_changes) -mp.observe_property("video-out-params", "native", watch_changes) +mp.observe_property("current-tracks", "native", function(name, value) + update_property(name, value) +end) + +mp.observe_property("track-list", "native", update_tracklist) +mp.observe_property("display-hidpi-scale", "native", update_property_dirty) +mp.observe_property("video-params", "native", update_property_dirty) +mp.observe_property("demuxer-via-network", "native", update_property) +mp.observe_property("stream-open-filename", "native", update_property) +mp.observe_property("path", "native", update_property) mp.observe_property("vid", "native", sync_changes) mp.observe_property("edition", "native", sync_changes) @@ -657,3 +778,5 @@ mp.add_key_binding(nil, "thumb_toggle", function() mp.osd_message("缩略图功能已启用", 2) end end) + +mp.register_idle(watch_changes) diff --git a/portable_config/scripts/uosc/elements/Logo.lua b/portable_config/scripts/uosc/elements/Logo.lua new file mode 100644 index 00000000..873794f4 --- /dev/null +++ b/portable_config/scripts/uosc/elements/Logo.lua @@ -0,0 +1,61 @@ +-- 存在问题(也许不算):无法实时自适应缩放 + +local Element = require('elements/Element') + +--[[ Logo ]] + +---@class Logo : Element +local Logo = class(Element) + +function Logo:new() return Class.new(self) --[[@as Logo]] end +function Logo:init() + Element.init(self, 'logo') + self.enabled = state.is_idle + + self.logo_lines = { + -- White border + '{\\c&HE5E5E5&\\p5}m 895 10 b 401 10 0 410 0 905 0 1399 401 1800 895 1800 1390 1800 1790 1399 1790 905 1790 410 1390 10 895 10 {\\p0}', + -- Purple fill + '{\\c&H682167&\\p5}m 925 42 b 463 42 87 418 87 880 87 1343 463 1718 925 1718 1388 1718 1763 1343 1763 880 1763 418 1388 42 925 42{\\p0}', + -- Darker fill + '{\\c&H430142&\\p5}m 1605 828 b 1605 1175 1324 1456 977 1456 631 1456 349 1175 349 828 349 482 631 200 977 200 1324 200 1605 482 1605 828{\\p0}', + -- White fill + '{\\c&HDDDBDD&\\p5}m 1296 910 b 1296 1131 1117 1310 897 1310 676 1310 497 1131 497 910 497 689 676 511 897 511 1117 511 1296 689 1296 910{\\p0}', + -- Triangle + '{\\c&H691F69&\\p5}m 762 1113 l 762 708 b 881 776 1000 843 1119 911 1000 978 881 1046 762 1113{\\p0}', + } + +end + +function Logo:decide_enabled() self.enabled = state.idlescreen and state.is_idle end +function Logo:on_prop_is_idle() self:decide_enabled() end +function Logo:on_prop_idlescreen() self:decide_enabled() end + +function Logo:render() + if Menu:is_open() then return end + + local ass = assdraw.ass_new() + + -- logo is rendered at 2^(5-1) = 16 times resolution with size 1800x1800 + local logo_size, font_size, spacing = 1800 / 16, 40, 10 + local total_height = logo_size + font_size + spacing + local icon_x, icon_y = (display.width - logo_size) / 2, (display.height - total_height) / 2 + local line_prefix = ('{\\rDefault\\an7\\1a&H00&\\bord0\\shad0\\pos(%f,%f)}'):format(icon_x, icon_y) + + -- mpv logo + for _, line in ipairs(self.logo_lines) do + ass:new_event() + ass:append(line_prefix .. line) + end + + if options.idlemsg == 'default' then + state.idlemsg = '' + else + state.idlemsg = options.idlemsg + end + ass:txt(display.width / 2, icon_y + logo_size + spacing, 8, tostring(state.idlemsg), {size = font_size}) + + return ass +end + +return Logo diff --git a/portable_config/scripts/uosc/elements/Menu.lua b/portable_config/scripts/uosc/elements/Menu.lua index 4048b76b..99d736f3 100644 --- a/portable_config/scripts/uosc/elements/Menu.lua +++ b/portable_config/scripts/uosc/elements/Menu.lua @@ -156,7 +156,7 @@ function Menu:update(data) -- Update items local first_active_index = nil - menu.items = {} + menu.items = {} -- {{title = lang._menu_item_empty_title, value = 'ignore', italic = 'true', muted = 'true'}} for i, item_data in ipairs(menu_data.items or {}) do if item_data.active and not first_active_index then first_active_index = i end @@ -506,7 +506,7 @@ function Menu:move_selected_item_to(index) if callback and from and from ~= index and index >= 1 and index <= #self.current.items then callback(from, index, self.current.submenu_path) self.current.selected_index = index - request_render() + self:set_scroll_by((index - from) * self.scroll_step) end end diff --git a/portable_config/scripts/uosc/elements/Timeline.lua b/portable_config/scripts/uosc/elements/Timeline.lua index 55a72aad..ebcd2684 100644 --- a/portable_config/scripts/uosc/elements/Timeline.lua +++ b/portable_config/scripts/uosc/elements/Timeline.lua @@ -394,7 +394,7 @@ function Timeline:render() then local scale_x, scale_y = display.scale_x, display.scale_y local border, margin_x, margin_y = math.ceil(2 * scale_x), round(10 * scale_x), round(5 * scale_y) - local thumb_x_margin, thumb_y_margin = border + margin_x, border + margin_y + local thumb_x_margin, thumb_y_margin = border + margin_x + bax, border + margin_y local thumb_width, thumb_height = thumbnail.width, thumbnail.height local thumb_x = round(clamp( thumb_x_margin, cursor_x * scale_x - thumb_width / 2, diff --git a/portable_config/scripts/uosc/elements/TopBar.lua b/portable_config/scripts/uosc/elements/TopBar.lua index 7258785c..b15121fc 100644 --- a/portable_config/scripts/uosc/elements/TopBar.lua +++ b/portable_config/scripts/uosc/elements/TopBar.lua @@ -85,6 +85,10 @@ function TopBar:decide_titles() self.alt_title = state.alt_title ~= '' and state.alt_title or nil self.main_title = state.title ~= '' and state.title or nil + if (self.main_title == 'No file') then + self.main_title = lang._border_title + end + -- Fall back to alt title if main is empty if not self.main_title then self.main_title, self.alt_title = self.alt_title, nil diff --git a/portable_config/scripts/uosc/lib/ass.lua b/portable_config/scripts/uosc/lib/ass.lua index 108953f1..9f7132ce 100644 --- a/portable_config/scripts/uosc/lib/ass.lua +++ b/portable_config/scripts/uosc/lib/ass.lua @@ -85,7 +85,7 @@ function ass_mt:tooltip(element, value, opts) local align_top = opts.responsive == false or element.ay - offset > opts.size * 2 local x = element.ax + (element.bx - element.ax) / 2 local y = align_top and element.ay - offset or element.by + offset - local margin = (opts.width_overwrite or text_width(value, opts)) / 2 + 10 + local margin = (opts.width_overwrite or text_width(value, opts)) / 2 + 10 + Elements.window_border.size self:txt(clamp(margin, x, display.width - margin), y, align_top and 2 or 8, value, opts) end diff --git a/portable_config/scripts/uosc/lib/lang.lua b/portable_config/scripts/uosc/lib/lang.lua index 875804b9..fdce8d44 100644 --- a/portable_config/scripts/uosc/lib/lang.lua +++ b/portable_config/scripts/uosc/lib/lang.lua @@ -2,35 +2,38 @@ lang = { -- context_menu_default - _load = '加载', - _file_browser = '※ 文件浏览器', - _import_sid = '※ 导入 字幕轨', - _navigation = '导航', - _playlist = '※ 播放列表', - _edition_list = '※ 版本列表', - _chapter_list = '※ 章节列表', - _vid_list = '※ 视频轨列表', - _aid_list = '※ 音频轨列表', - _sid_list = '※ 字幕轨列表', - _playlist_shuffle = '播放列表乱序重排', - _ushot = '※ 截屏', - _VIDEO = '视频', - _decoding_api = '切换 解码模式', - _deband_toggle = '切换 去色带状态', - _deint_toggle = '切换 去隔行状态', - _icc_toggle = '切换 自动校色', - _corpts_toggle = '切换 时间码解析模式', - _TOOLS = '工具', - _stats_toggle = '开关 常驻统计信息', - _console_on = '显示控制台', - _border_toggle = '切换 窗口边框', - _ontop_toggle = '切换 窗口置顶', - _audio_device = '※ 音频输出设备列表', - _stream_quality = '※ 流式传输品质', - _show_file_dir = '※ 打开 当前文件所在路径', - _show_config_dir = '※ 打开 设置目录', - _stop = '停止', - _quit = '退出mpv', + _cm_load = '加载', + _cm_file_browser = '※ 文件浏览器', + _cm_import_sid = '※ 导入 字幕轨', + _cm_navigation = '导航', + _cm_playlist = '※ 播放列表', + _cm_edition_list = '※ 版本列表', + _cm_chapter_list = '※ 章节列表', + _cm_vid_list = '※ 视频轨列表', + _cm_aid_list = '※ 音频轨列表', + _cm_sid_list = '※ 字幕轨列表', + _cm_playlist_shuffle = '播放列表乱序重排', + _cm_ushot = '※ 截屏', + _cm_video = '视频', + _cm_decoding_api = '切换 解码模式', + _cm_deband_toggle = '切换 去色带状态', + _cm_deint_toggle = '切换 去隔行状态', + _cm_icc_toggle = '切换 自动校色', + _cm_corpts_toggle = '切换 时间码解析模式', + _cm_tools = '工具', + _cm_stats_toggle = '开关 常驻统计信息', + _cm_console_on = '显示控制台', + _cm_border_toggle = '切换 窗口边框', + _cm_ontop_toggle = '切换 窗口置顶', + _cm_audio_device = '※ 音频输出设备列表', + _cm_stream_quality = '※ 流式传输品质', + _cm_show_file_dir = '※ 打开 当前文件所在路径', + _cm_show_config_dir = '※ 打开 设置目录', + _cm_stop = '停止', + _cm_quit = '退出mpv', + + -- no-border-title + _border_title= '未加载文件', -- track_loaders sub_menu menu_data _sid_menu = '字幕轨', @@ -38,27 +41,32 @@ lang = { _vid_menu = '视频轨', _import_id_menu = '导入 ', + -- _menu_item_empty_title = '空', + _sid_submenu_title = '字幕轨列表', _aid_submenu_title = '音频轨列表', _vid_submenu_title = '视频轨列表', _playlist_submenu_title = '播放列表', _chapter_list_submenu_title = '章节列表', + _chapter_list_submenu_item_title = '未命名章节 ', _edition_list_submenu_title = '版本列表', _edition_list_submenu_item_title = '版本', _stream_quality_submenu_title = '流式传输品质', _audio_device_submenu_title = '音频输出设备列表', + _audio_device_submenu_item_title = '自动', _submenu_import = '导入', _submenu_load_file = '打开文件', _submenu_id_disabled = '禁用', + _submenu_id_hint = '声道', _submenu_id_forced = '强制', _submenu_id_default = '默认', _submenu_id_external = '外挂', _submenu_id_title = '轨道 ', + _submenu_file_browser_item_hint = '驱动器列表', + _submenu_file_browser_item_hint2 = '上级目录', + _submenu_file_browser_item2_hint = '盘符', _submenu_file_browser_title = '驱动器列表', - _submenu_file_browser_item_title = '上级目录', - _submenu_file_browser_item2_title = '盘符', - _submenu_file_browser_item3_title = '驱动器列表', -- built-in_shortcut _button01 = '菜单', diff --git a/portable_config/scripts/uosc/lib/menus.lua b/portable_config/scripts/uosc/lib/menus.lua index 5855bbd9..17c5c6e2 100644 --- a/portable_config/scripts/uosc/lib/menus.lua +++ b/portable_config/scripts/uosc/lib/menus.lua @@ -106,7 +106,7 @@ function create_select_tracklist_type_menu_opener(menu_title, track_type, track_ end if track['demux-fps'] then h(string.format('%.5gfps', track['demux-fps'])) end h(track.codec) - if track['audio-channels'] then h(track['audio-channels'] .. ' channels') end + if track['audio-channels'] then h(track['audio-channels'] .. lang._submenu_id_hint) end if track['demux-samplerate'] then h(string.format('%.3gkHz', track['demux-samplerate'] / 1000)) end if track.forced then h(lang._submenu_id_forced) end if track.default then h(lang._submenu_id_default) end @@ -181,10 +181,10 @@ function open_file_navigation_menu(directory_path, handle_select, opts) if is_root then if state.platform == 'windows' then - items[#items + 1] = {title = '..', hint = lang._submenu_file_browser_title, value = '{drives}', separator = true} + items[#items + 1] = {title = '..', hint = lang._submenu_file_browser_item_hint, value = '{drives}', separator = true} end else - items[#items + 1] = {title = '..', hint = lang._submenu_file_browser_item_title, value = directory.dirname, separator = true} + items[#items + 1] = {title = '..', hint = lang._submenu_file_browser_item_hint2, value = directory.dirname, separator = true} end local back_path = items[#items] and items[#items].value @@ -276,7 +276,7 @@ function open_drives_menu(handle_select, opts) if drive then local drive_path = normalize_path(drive) items[#items + 1] = { - title = drive, hint = lang._submenu_file_browser_item2_title, value = drive_path, active = opts.active_path == drive_path, + title = drive, hint = lang._submenu_file_browser_item2_hint, value = drive_path, active = opts.active_path == drive_path, } if opts.selected_path == drive_path then selected_index = #items end end @@ -286,7 +286,7 @@ function open_drives_menu(handle_select, opts) end return Menu:open( - {type = opts.type, title = opts.title or lang._submenu_file_browser_item3_title, items = items, selected_index = selected_index}, + {type = opts.type, title = opts.title or lang._submenu_file_browser_title, items = items, selected_index = selected_index}, handle_select ) end diff --git a/portable_config/scripts/uosc/lib/text.lua b/portable_config/scripts/uosc/lib/text.lua index d573b816..eca4de2a 100644 --- a/portable_config/scripts/uosc/lib/text.lua +++ b/portable_config/scripts/uosc/lib/text.lua @@ -87,7 +87,9 @@ local function utf8_to_unicode(str, i) unicode = char_byte * (2 ^ 6) ^ (byte_count - 1) end for j = 2, byte_count do - char_byte = str:byte(i + j - 1) - 0x80 + if i + j - 1 <= #str then -- 临时修复 https://github.com/tomasklaen/uosc/issues/515 + char_byte = str:byte(i + j - 1) - 0x80 + end unicode = unicode + char_byte * (2 ^ 6) ^ (byte_count - j) end return round(unicode) diff --git a/portable_config/scripts/uosc/lib/utils.lua b/portable_config/scripts/uosc/lib/utils.lua index e07c10d3..43892c98 100644 --- a/portable_config/scripts/uosc/lib/utils.lua +++ b/portable_config/scripts/uosc/lib/utils.lua @@ -539,7 +539,7 @@ function normalize_chapters(chapters) table.sort(chapters, function(a, b) return a.time < b.time end) -- Ensure titles for index, chapter in ipairs(chapters) do - chapter.title = chapter.title or ('Chapter ' .. index) + chapter.title = chapter.title or (lang._chapter_list_submenu_item_title .. index) chapter.lowercase_title = chapter.title:lower() end return chapters diff --git a/portable_config/scripts/uosc/main.lua b/portable_config/scripts/uosc/main.lua index 6b0d589f..b411b9c0 100644 --- a/portable_config/scripts/uosc/main.lua +++ b/portable_config/scripts/uosc/main.lua @@ -1,6 +1,6 @@ --[[ SOURCE_ https://github.com/tomasklaen/uosc/tree/main/scripts -COMMIT_ ec52252380f896ca709216307e3bf021fbee914b +COMMIT_ 5e2c93055155bc9aec7534d13804d4f0d7f8a72d 文档_ https://github.com/hooke007/MPV_lazy/discussions/186 极简主义设计驱动的多功能界面脚本群组,兼容 thumbfast 新缩略图引擎 @@ -112,6 +112,8 @@ defaults = { chapter_ranges = 'openings:30abf964,endings:30abf964,ads:c54e4e80', chapter_range_patterns = 'openings:オープニング;endings:エンディング', + idlescreen = true, + idlemsg = 'default', idle_call_menu = 0, -- 空闲自动弹出上下文菜单 custom_font = 'default', -- 自定义界面字体 } @@ -145,45 +147,47 @@ function auto_ui_scale() options.ui_scale = 1 end end +-- 设置脚本属性 +mp.set_property_native('user-data/osc', { idlescreen = options.idlescreen }) --[[ CONFIG ]] -- 上下文菜单的默认内容 local function create_default_menu() return { - {title = lang._load, items = { - {title = lang._file_browser, value = 'script-binding uosc/open-file'}, - {title = lang._import_sid, value = 'script-binding uosc/load-subtitles'}, + {title = lang._cm_load, items = { + {title = lang._cm_file_browser, value = 'script-binding uosc/open-file'}, + {title = lang._cm_import_sid, value = 'script-binding uosc/load-subtitles'}, },}, - {title = lang._navigation, items = { - {title = lang._playlist, value = 'script-binding uosc/playlist'}, - {title = lang._edition_list, value = 'script-binding uosc/editions'}, - {title = lang._chapter_list, value = 'script-binding uosc/chapters'}, - {title = lang._vid_list, value = 'script-binding uosc/video'}, - {title = lang._aid_list, value = 'script-binding uosc/audio'}, - {title = lang._sid_list, value = 'script-binding uosc/subtitles'}, - {title = lang._playlist_shuffle, value = 'playlist-shuffle'}, + {title = lang._cm_navigation, items = { + {title = lang._cm_playlist, value = 'script-binding uosc/playlist'}, + {title = lang._cm_edition_list, value = 'script-binding uosc/editions'}, + {title = lang._cm_chapter_list, value = 'script-binding uosc/chapters'}, + {title = lang._cm_vid_list, value = 'script-binding uosc/video'}, + {title = lang._cm_aid_list, value = 'script-binding uosc/audio'}, + {title = lang._cm_sid_list, value = 'script-binding uosc/subtitles'}, + {title = lang._cm_playlist_shuffle, value = 'playlist-shuffle'}, },}, - {title = lang._ushot, value = 'script-binding uosc/shot'}, - {title = lang._VIDEO, items = { - {title = lang._decoding_api, value = 'cycle-values hwdec no auto auto-copy'}, - {title = lang._deband_toggle, value = 'cycle deband'}, - {title = lang._deint_toggle, value = 'cycle deinterlace'}, - {title = lang._icc_toggle, value = 'cycle icc-profile-auto'}, - {title = lang._corpts_toggle, value = 'cycle correct-pts'}, + {title = lang._cm_ushot, value = 'script-binding uosc/shot'}, + {title = lang._cm_video, items = { + {title = lang._cm_decoding_api, value = 'cycle-values hwdec no auto auto-copy'}, + {title = lang._cm_deband_toggle, value = 'cycle deband'}, + {title = lang._cm_deint_toggle, value = 'cycle deinterlace'}, + {title = lang._cm_icc_toggle, value = 'cycle icc-profile-auto'}, + {title = lang._cm_corpts_toggle, value = 'cycle correct-pts'}, },}, - {title = lang._TOOLS, items = { - {title = lang._stats_toggle, value = 'script-binding display-stats-toggle'}, - {title = lang._console_on, value = 'script-binding console/enable'}, - {title = lang._border_toggle, value = 'cycle border'}, - {title = lang._ontop_toggle, value = 'cycle ontop'}, - {title = lang._audio_device, value = 'script-binding uosc/audio-device'}, - {title = lang._stream_quality, value = 'script-binding uosc/stream-quality'}, - {title = lang._show_file_dir, value = 'script-binding uosc/show-in-directory'}, - {title = lang._show_config_dir, value = 'script-binding uosc/open-config-directory'}, + {title = lang._cm_tools, items = { + {title = lang._cm_stats_toggle, value = 'script-binding display-stats-toggle'}, + {title = lang._cm_console_on, value = 'script-binding console/enable'}, + {title = lang._cm_border_toggle, value = 'cycle border'}, + {title = lang._cm_ontop_toggle, value = 'cycle ontop'}, + {title = lang._cm_audio_device, value = 'script-binding uosc/audio-device'}, + {title = lang._cm_stream_quality, value = 'script-binding uosc/stream-quality'}, + {title = lang._cm_show_file_dir, value = 'script-binding uosc/show-in-directory'}, + {title = lang._cm_show_config_dir, value = 'script-binding uosc/open-config-directory'}, },}, - {title = lang._stop, value = 'stop'}, - {title = lang._quit, value = 'quit'}, + {title = lang._cm_stop, value = 'stop'}, + {title = lang._cm_quit, value = 'quit'}, } end @@ -439,6 +443,8 @@ state = { margin_left = 0, margin_right = 0, hidpi_scale = 1, + idlescreen = options.idlescreen, + idlemsg = options.idlemsg, } thumbnail = {width = 0, height = 0, disabled = false} external = {} -- Properties set by external scripts @@ -512,17 +518,19 @@ end function update_margins() if display.height == 0 then return end - local function is_persistent(element) return element and element.enabled and element:is_persistent() end + local function causes_margin(element) + return element and element.enabled and (element:is_persistent() or element.min_visibility > 0.5) + end local timeline, top_bar, controls, volume = Elements.timeline, Elements.top_bar, Elements.controls, Elements.volume -- margins are normalized to window size local left, right, top, bottom = 0, 0, 0, 0 - if is_persistent(controls) then bottom = (display.height - controls.ay) / display.height - elseif is_persistent(timeline) then bottom = (display.height - timeline.ay) / display.height end + if causes_margin(controls) then bottom = (display.height - controls.ay) / display.height + elseif causes_margin(timeline) then bottom = (display.height - timeline.ay) / display.height end - if is_persistent(top_bar) then top = top_bar.title_by / display.height end + if causes_margin(top_bar) then top = top_bar.title_by / display.height end - if is_persistent(volume) then + if causes_margin(volume) then if options.volume == 'left' then left = volume.bx / display.width elseif options.volume == 'right' then right = volume.ax / display.width end end @@ -1041,7 +1049,7 @@ bind_command('show-in-directory', function() if state.platform == 'windows' then utils.subprocess_detached({args = {'explorer', '/select,', state.path}, cancellable = false}) - elseif state.platform == 'macos' then + elseif state.platform == 'darwin' then utils.subprocess_detached({args = {'open', '-R', state.path}, cancellable = false}) elseif state.platform == 'linux' then local result = utils.subprocess({args = {'nautilus', state.path}, cancellable = false}) @@ -1203,10 +1211,14 @@ bind_command('audio-device', create_self_updating_menu_opener({ local items = {} for _, device in ipairs(audio_device_list) do if device.name == 'auto' or string.match(device.name, '^' .. ao) then + local title = device.description + if title == 'Autoselect device' then + title = lang._audio_device_submenu_item_title + end local hint = string.match(device.name, ao .. '/(.+)') if not hint then hint = device.name end items[#items + 1] = { - title = device.description, + title = title, hint = hint, active = device.name == current_device, value = device.name, @@ -1226,7 +1238,7 @@ bind_command('open-config-directory', function() if state.platform == 'windows' then args = {'explorer', '/select,', config.path} - elseif state.platform == 'macos' then + elseif state.platform == 'darwin' then args = {'open', '-R', config.path} elseif state.platform == 'linux' then args = {'xdg-open', config.dirname} @@ -1324,9 +1336,24 @@ mp.register_script_message('set-min-visibility', function(visibility, elements) end) mp.register_script_message('flash-elements', function(elements) Elements:flash(split(elements, ' *, *')) end) mp.register_script_message('overwrite-binding', function(name, command) key_binding_overwrites[name] = command end) +if options.idlescreen then + mp.register_script_message('osc-idlescreen', function(mode, no_osd) + if mode == 'cycle' then mode = state.idlescreen and 'no' or 'yes' end + set_state('idlescreen', mode == 'yes') + utils.shared_script_property_set('osc-idlescreen', mode) + mp.set_property_native('user-data/osc', { idlescreen = state.idlescreen }) + + if not no_osd and mp.get_property_number('osd-level', 1) >= 1 then + mp.osd_message('LOGO的可见性:' .. tostring(mode)) + end + end) +end --[[ ELEMENTS ]] +if options.idlescreen then + require('elements/Logo'):new() +end require('elements/WindowBorder'):new() require('elements/BufferingIndicator'):new() require('elements/PauseIndicator'):new() diff --git a/portable_config/shaders/guided.glsl b/portable_config/shaders/guided.glsl index 3c804da3..bf3f9e2b 100644 --- a/portable_config/shaders/guided.glsl +++ b/portable_config/shaders/guided.glsl @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -//desc: Guided filter guided by the downscaled image +// Description: guided.glsl: Guided by the downscaled image /* The radius can be adjusted with the MEANI stage's downscaling factor. * Higher numbers give a bigger radius. @@ -33,10 +33,10 @@ //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB -//!DESC Guided filter (PREI) //!BIND HOOKED //!WIDTH HOOKED.w 1.25 / //!HEIGHT HOOKED.h 1.25 / +//!DESC Guided filter (PREI) //!SAVE PREI vec4 hook() @@ -47,10 +47,10 @@ vec4 hook() //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB -//!DESC Guided filter (I) //!BIND PREI -//!WIDTH HOOKED.w 1.0 / -//!HEIGHT HOOKED.h 1.0 / +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h +//!DESC Guided filter (I) //!SAVE I vec4 hook() @@ -58,6 +58,7 @@ vec4 hook() return PREI_texOff(0); } + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB diff --git a/portable_config/shaders/guided_lgc.glsl b/portable_config/shaders/guided_lgc.glsl index 816e4511..7ff3de39 100644 --- a/portable_config/shaders/guided_lgc.glsl +++ b/portable_config/shaders/guided_lgc.glsl @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -//desc: Luma-guided-chroma denoising. +// Description: guided_lgc.glsl: Luma-guided-chroma denoising. /* The radius can be adjusted with the MEANI stage's downscaling factor. * Higher numbers give a bigger radius. @@ -31,10 +31,10 @@ */ //!HOOK CHROMA -//!DESC Guided filter (I) //!BIND LUMA //!WIDTH LUMA.w //!HEIGHT LUMA.h +//!DESC Guided filter (I, share) //!SAVE I vec4 hook() @@ -42,6 +42,7 @@ vec4 hook() return LUMA_texOff(0); } + //!HOOK CHROMA //!DESC Guided filter (P) //!BIND HOOKED @@ -57,9 +58,9 @@ vec4 hook() //!HOOK CHROMA //!DESC Guided filter (MEANI) //!BIND I +//!SAVE MEANI //!WIDTH I.w 2.0 / //!HEIGHT I.h 2.0 / -//!SAVE MEANI vec4 hook() { diff --git a/portable_config/shaders/guided_s.glsl b/portable_config/shaders/guided_s.glsl index cc8f4467..a1c2c174 100644 --- a/portable_config/shaders/guided_s.glsl +++ b/portable_config/shaders/guided_s.glsl @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -//desc: "Self-guided" guided filter +// Description: guided_s.glsl: Self-guided /* The radius can be adjusted with the MEANIP stage's downscaling factor. * Higher numbers give a bigger radius. diff --git a/portable_config/shaders/nlmeans.glsl b/portable_config/shaders/nlmeans.glsl index ebcc5d35..655da37d 100644 --- a/portable_config/shaders/nlmeans.glsl +++ b/portable_config/shaders/nlmeans.glsl @@ -19,7 +19,7 @@ * along with this program. If not, see . */ -// Profile description: Default profile, general purpose, tuned for low noise +// Description: nlmeans.glsl: Default profile, general purpose, tuned for low noise /* The recommended usage of this shader and its variant profiles is to add them * to input.conf and then dispatch the appropriate shader via a keybind during @@ -48,8 +48,8 @@ * of noise. * * The denoiser will not work properly if the content has been upscaled - * beforehand, whether it was done by you or someone down the line. Consider - * issuing a command to downscale in the mpv console, like so: + * beforehand (whether it was done by you or not). In such cases, consider + * issuing a command to downscale in the mpv console (backtick ` key): * * vf toggle scale=-2:720 * @@ -65,12 +65,13 @@ * may be different for your system. * * If your GPU doesn't support textureGather, or if you are on a version of mpv - * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ and VLQ - * profiles. + * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ profile * - * textureGather is LUMA only and limited to the following configurations: + * If you plan on tinkering with NLM's settings, read below: * - * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2}:M!=1 + * textureGather only applies to luma and limited to the these configurations: + * + * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2} * - Default, very fast, rotations and reflections should be free * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. @@ -83,6 +84,7 @@ * * Options which always disable textureGather: * - PD + * - NG */ // The following is shader code injected from guided.glsl @@ -104,7 +106,7 @@ * along with this program. If not, see . */ -//desc: Guided filter guided by the downscaled image +// Description: guided.glsl: Guided by the downscaled image /* The radius can be adjusted with the MEANI stage's downscaling factor. * Higher numbers give a bigger radius. @@ -120,10 +122,10 @@ //!HOOK LUMA //!HOOK CHROMA -//!DESC Guided filter (PREI) //!BIND HOOKED //!WIDTH HOOKED.w 1.25 / //!HEIGHT HOOKED.h 1.25 / +//!DESC Guided filter (PREI) //!SAVE _INJ_PREI vec4 hook() @@ -133,10 +135,10 @@ vec4 hook() //!HOOK LUMA //!HOOK CHROMA -//!DESC Guided filter (I) //!BIND _INJ_PREI -//!WIDTH HOOKED.w 1.0 / -//!HEIGHT HOOKED.h 1.0 / +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h +//!DESC Guided filter (I) //!SAVE _INJ_I vec4 hook() @@ -144,6 +146,7 @@ vec4 hook() return _INJ_PREI_texOff(0); } + //!HOOK LUMA //!HOOK CHROMA //!DESC Guided filter (P) @@ -310,69 +313,52 @@ vec4 hook() return _INJ_MEANA_texOff(0) * HOOKED_texOff(0) + _INJ_MEANB_texOff(0); } -// End of source code injected from guided.glsl +// End of source code injected from guided.glsl + //!HOOK LUMA //!HOOK CHROMA -//!DESC Non-local means (downscale) -//!WIDTH LUMA.w 3 / -//!HEIGHT LUMA.h 3 / -//!BIND LUMA -//!SAVE EP +//!BIND RF_LUMA +//!WIDTH RF_LUMA.w +//!HEIGHT RF_LUMA.h +//!DESC Non-local means (RF, share) +//!SAVE RF vec4 hook() { - return LUMA_texOff(0); + return RF_LUMA_texOff(0); } //!HOOK LUMA //!HOOK CHROMA -//!DESC Non-local means (share) -//!BIND RF_LUMA -//!SAVE RF +//!BIND LUMA +//!WIDTH LUMA.w 3 / +//!HEIGHT LUMA.h 3 / +//!DESC Non-local means (EP) +//!SAVE EP vec4 hook() { - return RF_LUMA_texOff(0); + return LUMA_texOff(0); } //!HOOK LUMA //!HOOK CHROMA //!BIND HOOKED //!BIND RF_LUMA -//!BIND EP //!BIND RF +//!BIND EP //!DESC Non-local means (nlmeans.glsl) -/* User variables - * - * It is usually preferable to denoise chroma and luma differently, so the user - * variables for luma and chroma are split. - */ +// User variables -/* S = denoising factor - * P = patch size - * R = research size - * - * The denoising factor controls the level of blur, higher is blurrier. - * - * Patch size should usually be an odd number greater than or equal to 3. - * Higher values are slower and not always better. - * - * Research size usually be an odd number greater than or equal to 3. Higher - * values are usually better, but slower and offer diminishing returns. - * - * Even-numbered patch/research sizes will sample between pixels unless PS=6. - * It's not known whether this is ever useful behavior or not. This is - * incompatible with textureGather optimizations, so NG=1 to disable them. - */ +// It is generally preferable to denoise luma and chroma differently, so the +// user variables for luma and chroma are split. + +// Denoising factor (level of blur, higher means more blur) #ifdef LUMA_raw #define S 2.0 -#define P 3 -#define R 5 #else #define S 5.0 -#define P 3 -#define R 5 #endif /* Adaptive sharpening @@ -380,11 +366,16 @@ vec4 hook() * Uses the blur incurred by denoising to perform an unsharp mask, and uses the * weight map to restrict the sharpening to edges. * - * Use M=4 to get a good look at which areas are/aren't sharpened. + * If you just want to increase/decrease sharpness then you want to change ASF. + * + * Use V=4 to visualize which areas are sharpened (black means sharpen). * - * AS: 2 for sharpening, 1 for sharpening+denoising, 0 to disable - * ASF: Sharpening factor, higher numbers make a sharper underlying image - * ASP: Weight power, higher numbers use more of the sharp image + * AS: + * - 0 to disable + * - 1 to sharpen+denoise + * - 2 to sharpen only + * ASF: Higher numbers make a sharper image + * ASP: Higher numbers use more of the sharp image * ASW: * - 0 to use pre-WD weights * - 1 to use post-WD weights (ASP should be ~2x to compensate) @@ -396,15 +387,15 @@ vec4 hook() */ #ifdef LUMA_raw #define AS 0 -#define ASF 2.0 -#define ASP 4.0 +#define ASF 3.0 +#define ASP 1.0 #define ASW 0 #define ASK 1 #define ASC 0.0 #else #define AS 0 -#define ASF 2.0 -#define ASP 4.0 +#define ASF 3.0 +#define ASP 1.0 #define ASW 0 #define ASK 1 #define ASC 0.0 @@ -412,12 +403,10 @@ vec4 hook() /* Starting weight * - * Lower numbers give less weight to the pixel-of-interest, which may help - * handle higher noise levels, ringing, and may be useful for other things too? + * Also known as the center weight. This represents the weight of the + * pixel-of-interest. Lower numbers may help handle heavy noise & ringing. * - * EPSILON should be used instead of zero to avoid divide-by-zero errors. The - * avg_weight/old_avg_weight variables may be used to make SW adapt to the - * local noise level, e.g., SW=max(avg_weight, EPSILON) + * EPSILON should be used instead of zero to avoid divide-by-zero errors. */ #ifdef LUMA_raw #define SW 1.0 @@ -432,7 +421,7 @@ vec4 hook() * result, especially around edges. * * WD: - * - 2: True average. Very good quality, but slower and uses more memory. + * - 2: True average. Better quality, but slower and requires GLSL 4.0 or later * - 1: Moving cumulative average. Inaccurate, tends to blur directionally. * - 0: Disable * @@ -451,12 +440,14 @@ vec4 hook() /* Extremes preserve * - * Reduces denoising around very bright/dark areas. The downscaling factor of - * EP (located near the top of this shader) controls the area sampled for - * luminance (higher numbers consider more area). + * Reduces denoising around very bright/dark areas. + * + * The downscaling factor of the EP shader stage affects what is considered a + * bright/dark area. The default of 3 should be fine, it's not recommended to + * change this. * * This is incompatible with RGB. If you have RGB hooks enabled then you will - * have to delete the EP shader stage or specify EP=0 through nlmeans_cfg. + * have to delete the EP shader stage or specify EP=0 through shader_cfg. * * EP: 1 to enable, 0 to disable * DP: EP strength on dark patches, 0 to fully denoise @@ -478,25 +469,26 @@ vec4 hook() /* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ /* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* Robust filtering +/* Patch & research sizes * - * This setting is dependent on code generation from nlmeans_cfg, so this - * setting can only be enabled via nlmeans_cfg. + * Patch size should be an odd number greater than or equal to 3. Higher values + * are slower and not always better. * - * Compares the pixel-of-interest against a guide, which could be a downscaled - * image or the output of another shader such as guided.glsl + * Research size be an odd number greater than or equal to 3. Higher values are + * generally better, but slower, blurrier, and gives diminishing returns. */ #ifdef LUMA_raw -#define RF 1 +#define P 3 +#define R 5 #else -#define RF 1 +#define P 3 +#define R 5 #endif -/* Search shape +/* Patch and research shapes * - * Determines the shape of patches and research zones. Different shapes have - * different speed and quality characteristics. Every shape (besides square) is - * smaller than square. + * Different shapes have different speed and quality characteristics. Every + * shape (besides square) is smaller than square. * * PS applies applies to patches, RS applies to research zones. * @@ -519,11 +511,22 @@ vec4 hook() #define PS 3 #endif +/* Robust filtering + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. + * + * Compares the pixel-of-interest against a guide, which could be a downscaled + * image or the output of another shader + */ +#define RF_LUMA 1 +#define RF 1 + /* Rotational/reflectional invariance * - * Number of rotations/reflections to try for each patch comparison. Slow, but - * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. The most similar rotation/reflection will be used. + * Number of rotations/reflections to try for each patch comparison. Can be + * slow, but improves feature preservation. More rotations/reflections gives + * diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -540,29 +543,39 @@ vec4 hook() #endif /* Temporal denoising + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. * * Caveats: - * - Slower, each frame needs to be researched - * - Requires vo=gpu-next and nlmeans_temporal.glsl + * - Slower: + * - Each frame needs to be researched (more samples & more math) + * - Gather optimizations only apply to the current frame + * - Requires vo=gpu-next * - Luma-only (this is a bug) * - Buggy * - * Gather samples across multiple frames. May cause motion blur and may - * struggle more with noise that persists across multiple frames (e.g., from - * compression or duplicate frames), but can work very well on high quality - * video. + * May cause motion blur and may struggle more with noise that persists across + * multiple frames (e.g., from compression or duplicate frames), but can work + * very well on high quality video. * * Motion estimation (ME) should improve quality without impacting speed. * * T: number of frames used * ME: motion estimation, 0 for none, 1 for max weight, 2 for weighted avg + * MEF: estimate factor, compensates for ME being one frame behind + * TRF: compare against the denoised frames */ #ifdef LUMA_raw #define T 0 #define ME 1 +#define MEF 2 +#define TRF 0 #else #define T 0 #define ME 0 +#define MEF 2 +#define TRF 0 #endif /* Spatial kernel @@ -574,69 +587,79 @@ vec4 hook() * closer/further, for instance SD=(1,1,0.5) would make the temporal axis * appear closer and increase blur between frames. * - * The intra-patch variants do not yet have well-understood effects. They are - * intended to make large patch sizes more useful. Likely slower. + * The intra-patch variants are supposed to help with larger patch sizes. * - * SS: spatial denoising factor + * SST: enables spatial kernel if R>=PST, 0 fully disables + * SS: spatial sigma * SD: spatial distortion (X, Y, time) - * PSS: intra-patch spatial denoising factor + * PSS: intra-patch spatial sigma * PST: enables intra-patch spatial kernel if P>=PST, 0 fully disables * PSD: intra-patch spatial distortion (X, Y) */ #ifdef LUMA_raw +#define SST 1 #define SS 0.25 -#define SD vec3(1,1,1.5) +#define SD vec3(1,1,1) #define PST 0 #define PSS 0.0 #define PSD vec2(1,1) #else +#define SST 1 #define SS 0.25 -#define SD vec3(1,1,1.5) +#define SD vec3(1,1,1) #define PST 0 #define PSS 0.0 #define PSD vec2(1,1) #endif -// Scaling factor (should match WIDTH/HEIGHT) +/* Kernels + * + * SK: spatial kernel + * RK: range kernel (takes patch differences) + * PSK: intra-patch spatial kernel + * + * List of available kernels: + * + * bicubic + * cos + * gaussian + * lanczos + * quadratic + * sinc + * sphinx + */ #ifdef LUMA_raw -#define SF 1 +#define SK gaussian +#define RK gaussian +#define PSK gaussian #else -#define SF 1 +#define SK gaussian +#define RK gaussian +#define PSK gaussian #endif -/* Estimator - * - * 0: means - * 1: Euclidean medians (extremely slow, may be good for heavy noise) - * 2: weight map (not a denoiser, maybe useful for generating image masks) - * 3: weighted median intensity (slow, may be good for heavy noise) - * 4: edge map (based on the relevant AS settings) - */ +// Scaling factor (should match WIDTH/HEIGHT) #ifdef LUMA_raw -#define M 0 +#define SF 1 #else -#define M 0 +#define SF 1 #endif -/* Difference visualization - * - * Visualizes the difference between input/output image +/* Visualization * * 0: off - * 1: absolute difference scaled by S - * 2: difference centered on 0.5 + * 1: absolute difference between input/output to the power of 0.25 + * 2: difference between input/output centered on 0.5 + * 3: avg_weight + * 4: edge map (based on the relevant AS settings) */ #ifdef LUMA_raw -#define DV 0 +#define V 0 #else -#define DV 0 +#define V 0 #endif -/* Blur factor - * - * 0 to 1, only useful for alternative estimators. You're probably looking for - * "S" (denoising factor), go back to the top of the shader! - */ +// Blur factor (0.0 returns the input image, 1.0 returns the output image) #ifdef LUMA_raw #define BF 1.0 #else @@ -657,17 +680,57 @@ vec4 hook() #define PD 0 #endif -// Duplicate 1st weight (for LGC) +// Duplicate 1st weight (for luma-guided-chroma) #ifdef LUMA_raw #define D1W 0 #else #define D1W 0 #endif -/* Shader code */ +// Skip patch comparison +#ifdef LUMA_raw +#define SKIP_PATCH 0 +#else +#define SKIP_PATCH 0 +#endif + +// Shader code #define EPSILON 0.00000000001 #define M_PI 3.14159265358979323846 +#define POW2(x) ((x)*(x)) +#define POW3(x) ((x)*(x)*(x)) +#define bicubic(x) ((1.0/6.0) * (POW3((x)+2) - 4 * POW3((x)+1) + 6 * POW3(x) - 4 * POW3(max((x)-1, 0)))) +#define gaussian(x) exp(-1 * POW2(x)) +#define lanczos(x) POW2(sinc(x)) +#define quadratic(x) ((x) < 0.5 ? 0.75 - POW2(x) : 0.5 * POW2((x) - 1.5)) +#define sinc(x) ((x) < 1e-8 ? 1.0 : sin((x)*M_PI) / ((x)*M_PI)) +#define sphinx(x) ((x) < 1e-8 ? 1.0 : 3.0 * (sin((x)*M_PI) - (x)*M_PI * cos((x)*M_PI)) / POW3((x)*M_PI)) + +// XXX could maybe be better optimized on LGC +// XXX return original alpha component instead of 1.0 +#if defined(LUMA_raw) +#define val float +#define val_swizz(v) (v.x) +#define unval(v) vec4(v.x, 0, 0, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#elif defined(CHROMA_raw) +#define val vec2 +#define val_swizz(v) (v.xy) +#define unval(v) vec4(v.x, v.y, 0, 1.0) +#define val_packed uint +#define val_pack(v) packUnorm2x16(v) +#define val_unpack(v) unpackUnorm2x16(v) +#else +#define val vec3 +#define val_swizz(v) (v.xyz) +#define unval(v) vec4(v.x, v.y, v.z, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#endif #if PS == 6 const int hp = P/2; @@ -682,39 +745,96 @@ const float hr = int(R/2) - 0.5*(1-(R%2)); // sample between pixels for even res #endif // donut increment, increments without landing on (0,0,0) -// much faster than a "continue" statement +// much faster than a continue statement #define DINCR(z,c) (z.c++,(z.c += int(z == vec3(0)))) -// search shapes and their corresponding areas -#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) +// patch/research shapes +// each shape is depicted in a comment, where Z=5 (Z corresponds to P or R) +// dots (.) represent samples (pixels) and X represents the pixel-of-interest + +// Z ..... +// Z ..... +// Z ..X.. +// Z ..... +// Z ..... +#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) +// (in this instance Z=4) +// Z .... +// Z .... +// Z ..X. +// Z .... +#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) + +// Z-4 . +// Z-2 ... +// Z ..X.. #define S_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz); incr) + +// Z-4 . +// Z-2 ... +// hz+1 ..X #define S_TRUNC_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz)*int(z.y!=0); incr) #define S_TRIANGLE_A(hz,Z) int(hz*hz+Z) +// Z-4 . +// Z-2 ... +// Z ..X.. +// Z-2 ... +// Z-4 . #define S_DIAMOND(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -abs(abs(z.x) - hz); z.y <= abs(abs(z.x) - hz); incr) #define S_DIAMOND_A(hz,Z) int(hz*hz*2+Z) -#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) +// +// Z ..X.. +// #define S_HORIZONTAL(z,hz,incr) for (z.x = -hz; z.x <= hz; incr) for (z.y = 0; z.y <= 0; z.y++) +// 90 degree rotation of S_HORIZONTAL +#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) + +// 1 . +// 1 . +// Z ..X.. +// 1 . +// 1 . #define S_PLUS(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz * int(z.x == 0); z.y <= hz * int(z.x == 0); incr) #define S_PLUS_A(hz,Z) (Z*2 - 1) -#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) -#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) +// XXX implement S_PLUS w/ an X overlayed: +// 3 . . . +// 3 ... +// Z ..X.. +// 3 ... +// 3 . . . + +// XXX implement an X shape: +// 2 . . +// 2 . . +// 1 X +// 2 . . +// 2 . . + +// 1x1 square +#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) #define T1 (T+1) #define FOR_FRAME(r) for (r.z = 0; r.z < T1; r.z++) +#ifdef LUMA_raw +#define RF_ RF_LUMA +#else +#define RF_ RF +#endif + // Skip comparing the pixel-of-interest against itself, unless RF is enabled -#if RF +#if RF_ #define RINCR(z,c) (z.c++) #else #define RINCR DINCR #endif -#define R_AREA(a) (a * T1 + RF-1) +#define R_AREA(a) (a * T1 + RF_-1) // research shapes // XXX would be nice to have the option of temporally-varying research sizes @@ -803,44 +923,44 @@ const int p_area = P_AREA(P*P); const float r_scale = 1.0/r_area; const float p_scale = 1.0/p_area; -#define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define sample(tex, pos, size, pt, off) tex(pos + pt * (vec2(off) + 0.5 - fract(pos*size))) +#define load_(off) sample(HOOKED_tex, HOOKED_pos, HOOKED_size, HOOKED_pt, off) -#if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#if RF_ && defined(LUMA_raw) +#define load2_(off) sample(RF_LUMA_tex, RF_LUMA_pos, RF_LUMA_size, RF_LUMA_pt, off) #define gather_offs(off, off_arr) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, off_arr))) #define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off) * RF_LUMA_pt, 0) -#elif RF && D1W -#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) +#elif RF_ && D1W +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) #define gather_offs(off, off_arr) (RF_mul * vec4(textureGatherOffsets(RF_raw, RF_pos + vec2(off) * RF_pt, off_arr))) #define gather(off) RF_gather(RF_pos + (off) * RF_pt, 0) -#elif RF -#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) +#elif RF_ +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) #else -#define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) load_(off) #define gather_offs(off, off_arr) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, off_arr))) #define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T -vec4 load(vec3 off) +val load(vec3 off) { - switch (int(off.z)) { - case 0: return load_(off); + switch (min(int(off.z), frame)) { + case 0: return val_swizz(load_(off)); + } } -vec4 load2(vec3 off) +val load2(vec3 off) { - switch (int(off.z)) { - case 0: return load2_(off); - } + return off.z == 0 ? val_swizz(load2_(off)) : load(off); } #else -#define load(off) load_(off) -#define load2(off) load2_(off) +#define load(off) val_swizz(load_(off)) +#define load2(off) val_swizz(load2_(off)) #endif -vec4 poi = load(vec3(0)); // pixel-of-interest -vec4 poi2 = load2(vec3(0)); // guide pixel-of-interest +val poi = load(vec3(0)); // pixel-of-interest +val poi2 = load2(vec3(0)); // guide pixel-of-interest #if RI // rotation vec2 rot(vec2 p, float d) @@ -867,22 +987,52 @@ vec2 ref(vec2 p, int d) #define ref(p, d) (p) #endif -vec4 patch_comparison(vec3 r, vec3 r2) +#if SST && R >= SST +float spatial_r(vec3 v) +{ + v.xy += 0.5 - fract(HOOKED_pos*HOOKED_size); + return SK(length(v*SD)*SS); +} +#else +#define spatial_r(v) (1) +#endif + +#if PST && P >= PST +#define spatial_p(v) PSK(length(v*PSD)*PSS) +#else +#define spatial_p(v) (1) +#endif + +val range(val pdiff_sq) +{ + const float h = S*0.013; + const float pdiff_scale = 1.0/(h*h); + pdiff_sq = sqrt(pdiff_sq * pdiff_scale); +#if defined(LUMA_raw) + return RK(pdiff_sq); +#elif defined(CHROMA_raw) + return vec2(RK(pdiff_sq.x), RK(pdiff_sq.y)); +#else + return vec3(RK(pdiff_sq.x), RK(pdiff_sq.y), RK(pdiff_sq.z)); +#endif + //return exp(-pdiff_sq * pdiff_scale); + + // weight function from the NLM paper, it's not very good + //return exp(-max(pdiff_sq - 2*S*S, 0.0) * pdiff_scale); +} + +val patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 min_rot = vec4(p_area); + val min_rot = val(p_area); FOR_ROTATION FOR_REFLECTION { - vec4 pdiff_sq = vec4(0); + val pdiff_sq = val(0); FOR_PATCH(p) { vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); + val diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); diff_sq *= diff_sq; -#if PST && P >= PST - float pdist = length(p.xy*PSD)*PSS; - pdist = exp(-(pdist*pdist)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); -#endif + diff_sq = 1 - (1 - diff_sq) * spatial_p(p.xy); pdiff_sq += diff_sq; } min_rot = min(min_rot, pdiff_sq); @@ -894,14 +1044,15 @@ vec4 patch_comparison(vec3 r, vec3 r2) #define NO_GATHER (PD == 0 && NG == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather // XXX extend to support arbitrary sizes (probably requires code generation) // XXX extend to support 3x3 square +// XXX support PSS const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; const ivec2 offsets_sf[4] = { ivec2(0,-1) * SF, ivec2(-1,0) * SF, ivec2(0,1) * SF, ivec2(1,0) * SF }; vec4 poi_patch = gather_offs(0, offsets); -vec4 patch_comparison_gather(vec3 r, vec3 r2) +float patch_comparison_gather(vec3 r, vec3 r2) { float min_rot = p_area - 1; vec4 transformer = gather_offs(r, offsets_sf); @@ -925,13 +1076,12 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) } float center_diff_sq = poi2.x - load2(r).x; center_diff_sq *= center_diff_sq; - return vec4(min_rot + center_diff_sq, 0, 0, 0) * p_scale; + return (min_rot + center_diff_sq) * p_scale; } -#elif (defined(LUMA_gather) || D1W) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER +#elif (defined(LUMA_gather) || D1W) && PS == 6 && RI == 0 && RFI == 0 && NO_GATHER // tiled even square patch_comparison_gather // XXX extend to support odd square? -// XXX rotations/reflections appear to be subtly broken -vec4 patch_comparison_gather(vec3 r, vec3 r2) +float patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; float min_rot = p_area; @@ -940,40 +1090,17 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) * w z * x y */ - FOR_ROTATION FOR_REFLECTION { - float pdiff_sq = 0; - for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { - vec4 poi_patch = gather(tile + r2.xy); - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - -#if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees -#endif -#if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } -#endif - - vec4 diff_sq = (poi_patch - transformer) * (poi_patch - transformer); -#if PST && P >= PST - // XXX refactor to avoid pow (should probably break off into a function) - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq += dot(diff_sq, vec4(1)); - } - min_rot = min(min_rot, pdiff_sq); + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { + vec4 diff_sq = gather(tile + r.xy) - gather(tile + r2.xy); + diff_sq *= diff_sq; + diff_sq = 1 - (1 - diff_sq) * vec4(spatial_p(tile+vec2(0,1)), spatial_p(tile+vec2(1,1)), + spatial_p(tile+vec2(1,0)), spatial_p(tile+vec2(0,0))); + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); - return vec4(min_rot, 0, 0, 0) * p_scale; + return min_rot * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -981,9 +1108,9 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) vec4 hook() { - vec4 total_weight = vec4(0); - vec4 sum = vec4(0); - vec4 result = vec4(0); + val total_weight = val(0); + val sum = val(0); + val result = val(0); vec3 r = vec3(0); vec3 p = vec3(0); @@ -997,41 +1124,38 @@ vec4 hook() float me_weight = 0; #endif -#if WD == 2 || M == 3 // weight discard, weighted median intensities +#if WD == 2 // weight discard int r_index = 0; - vec4 all_weights[r_area]; - vec4 all_pixels[r_area]; + val_packed all_weights[r_area]; + val_packed all_pixels[r_area]; #elif WD == 1 // weight discard - vec4 no_weights = vec4(0); - vec4 discard_total_weight = vec4(0); - vec4 discard_sum = vec4(0); -#endif - -#if M == 1 // Euclidean medians - vec4 minsum = vec4(0); + val no_weights = val(0); + val discard_total_weight = val(0); + val discard_sum = val(0); #endif FOR_FRAME(r) { // XXX ME is always a frame behind, should have to option to re-research after applying ME (could do it an arbitrary number of times per frame if desired) #if T && ME == 1 // temporal & motion estimation max weight if (r.z > 0) { - me += me_tmp; + me += me_tmp * MEF; me_tmp = vec3(0); maxweight = 0; } #elif T && ME == 2 // temporal & motion estimation weighted average if (r.z > 0) { - me += round(me_sum / me_weight); + me += round(me_sum / me_weight * MEF); me_sum = vec3(0); me_weight = 0; } #endif - FOR_RESEARCH(r) { - // main NLM logic - const float h = S*0.013; - const float pdiff_scale = 1.0/(h*h); - vec4 pdiff_sq = (r.z == 0) ? patch_comparison_gather(r+me, vec3(0)) : patch_comparison(r+me, vec3(0)); - vec4 weight = exp(-pdiff_sq * pdiff_scale); + FOR_RESEARCH(r) { // main NLM logic +#if SKIP_PATCH + val weight = val(1); +#else + val pdiff_sq = (r.z == 0) ? val(patch_comparison_gather(r+me, vec3(0))) : patch_comparison(r+me, vec3(0)); + val weight = range(pdiff_sq); +#endif #if T && ME == 1 // temporal & motion estimation max weight me_tmp = vec3(r.xy,0) * step(maxweight, weight.x) + me_tmp * (1 - step(maxweight, weight.x)); @@ -1042,18 +1166,18 @@ vec4 hook() #endif #if D1W - weight = vec4(weight.x); + weight = val(weight.x); #endif - weight *= exp(-(length(r*SD)*SS * length(r*SD)*SS)); // spatial kernel + weight *= spatial_r(r); -#if WD == 2 || M == 3 // weight discard, weighted median intensity - all_weights[r_index] = weight; - all_pixels[r_index] = load(r+me); +#if WD == 2 // weight discard + all_weights[r_index] = val_pack(weight); + all_pixels[r_index] = val_pack(load(r+me)); r_index++; #elif WD == 1 // weight discard - vec4 wd_scale = 1.0/max(no_weights, 1); - vec4 keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); + val wd_scale = 1.0/max(no_weights, 1); + val keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); discard_sum += load(r+me) * weight * (1 - keeps); discard_total_weight += weight * (1 - keeps); no_weights += keeps; @@ -1061,45 +1185,25 @@ vec4 hook() sum += load(r+me) * weight; total_weight += weight; - -#if M == 1 // Euclidean median - // Based on: https://arxiv.org/abs/1207.3056 - // XXX might not work with ME - vec3 r2; - vec4 wpdist_sum = vec4(0); - FOR_FRAME(r2) FOR_RESEARCH(r2) { - vec4 pdist = (r.z + r2.z) == 0 ? patch_comparison_gather(r+me, r2+me) : patch_comparison(r+me, r2+me); - wpdist_sum += sqrt(pdist) * (1-weight); - } - - vec4 newmin = step(wpdist_sum, minsum); // wpdist_sum <= minsum - newmin *= 1 - step(wpdist_sum, vec4(0)); // && wpdist_sum > 0 - newmin += step(minsum, vec4(0)); // || minsum <= 0 - newmin = min(newmin, 1); - - minsum = (newmin * wpdist_sum) + ((1-newmin) * minsum); - result = (newmin * load(r+me)) + ((1-newmin) * result); -#endif } // FOR_RESEARCH } // FOR_FRAME - // XXX optionally put the denoised pixel into the frame buffer? -#if T // temporal -#endif - - vec4 avg_weight = total_weight * r_scale; - vec4 old_avg_weight = avg_weight; + val avg_weight = total_weight * r_scale; + val old_avg_weight = avg_weight; #if WD == 2 // true average - total_weight = vec4(0); - sum = vec4(0); - vec4 no_weights = vec4(0); + total_weight = val(0); + sum = val(0); + val no_weights = val(0); for (int i = 0; i < r_area; i++) { - vec4 keeps = step(avg_weight*WDT, all_weights[i]); - all_weights[i] *= keeps; - sum += all_pixels[i] * all_weights[i]; - total_weight += all_weights[i]; + val w = val_unpack(all_weights[i]); + val px = val_unpack(all_pixels[i]); + val keeps = step(avg_weight*WDT, w); + + w *= keeps; + sum += px * w; + total_weight += w; no_weights += keeps; } #elif WD == 1 // moving cumulative average @@ -1110,29 +1214,23 @@ vec4 hook() avg_weight = total_weight / no_weights; #endif - total_weight += SW; - sum += poi * SW; + total_weight += SW * spatial_r(vec3(0)); + sum += poi * SW * spatial_r(vec3(0)); -#if M == 3 // weighted median intensity - const float hr_area = r_area/2.0; - vec4 is_median, gt, lt, gte, lte, neq; +#if V == 3 // weight map + result = val(avg_weight); +#else // mean + result = val(sum / total_weight); +#endif - for (int i = 0; i < r_area; i++) { - gt = lt = vec4(0); - for (int j = 0; j < r_area; j++) { - gte = step(all_pixels[i]*all_weights[i], all_pixels[j]*all_weights[j]); - lte = step(all_pixels[j]*all_weights[j], all_pixels[i]*all_weights[i]); - neq = 1 - gte * lte; - gt += gte * neq; - lt += lte * neq; - } - is_median = step(gt, vec4(hr_area)) * step(lt, vec4(hr_area)); - result += step(result, vec4(0)) * is_median * all_pixels[i]; - } -#elif M == 2 // weight map - result = avg_weight; -#elif M == 0 // mean - result = sum / total_weight; + // store frames for temporal +#if T > 1 + +#endif +#if T && TRF + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(result)); +#elif T + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(poi2)); #endif #if ASW == 0 // pre-WD weights @@ -1142,22 +1240,20 @@ vec4 hook() #endif #if ASK == 0 - vec4 sharpening_strength = pow(AS_weight, vec4(ASP)); + val sharpening_strength = pow(AS_weight, val(ASP)); #elif ASK == 1 -#define sigmoid(x) (tanh(x * 2*M_PI - M_PI)*0.5+0.5) - vec4 sharpening_strength = mix(pow(sigmoid(AS_weight), vec4(ASP)), - AS_weight, ASC); - // just in case ASC < 0 (will sharpen but it's janky XXX) - sharpening_strength = clamp(sharpening_strength, 0.0, 1.0); + val sharpening_strength = mix( + pow(smoothstep(0.0, 1.0, AS_weight), val(ASP)), + AS_weight, ASC); + // XXX normalize the result to account for a negative ASC? #elif ASK == 2 - vec4 sharpening_strength = vec4(ASP); + val sharpening_strength = val(ASP); #endif - // XXX maybe allow for alternative blurs? e.g., replace result w/ load2? #if AS == 1 // sharpen+denoise - vec4 sharpened = result + (poi - result) * ASF; + val sharpened = result + (poi - result) * ASF; #elif AS == 2 // sharpen only - vec4 sharpened = poi + (poi - result) * ASF; + val sharpened = poi + (poi - result) * ASF; #endif #if EP // extremes preserve @@ -1173,20 +1269,20 @@ vec4 hook() result = mix(sharpened, poi, sharpening_strength); #endif -#if M == 4 // edge map +#if V == 4 // edge map result = sharpening_strength; #endif -#if (M == 2 || M == 4) && defined(CHROMA_raw) // drop chroma for weight maps - result = vec4(0.5); +#if (V == 3 || V == 4) && defined(CHROMA_raw) // drop chroma for these visualizations + return vec4(0.5); #endif -#if DV == 1 - result = clamp(abs(poi - result) * S, 0.0, 1.0); -#elif DV == 2 +#if V == 1 + result = clamp(pow(abs(poi - result), val(0.25)), 0.0, 1.0); +#elif V == 2 result = (poi - result) * 0.5 + 0.5; #endif - return mix(poi, result, BF); + return unval(mix(poi, result, BF)); } diff --git a/portable_config/shaders/nlmeans_2x.glsl b/portable_config/shaders/nlmeans_2x.glsl new file mode 100644 index 00000000..737f245e --- /dev/null +++ b/portable_config/shaders/nlmeans_2x.glsl @@ -0,0 +1,1247 @@ +/* vi: ft=c + * + * Based on vf_nlmeans.c from FFmpeg. + * + * Copyright (c) 2022 an3223 + * Copyright (c) 2016 Clément Bœsch + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 2.1 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +// Description: nlmeans_2x.glsl: Experimental upscaler + +/* The recommended usage of this shader and its variant profiles is to add them + * to input.conf and then dispatch the appropriate shader via a keybind during + * media playback. Here is an example input.conf entry: + * + * F4 no-osd change-list glsl-shaders toggle "~~/shaders/nlmeans_luma.glsl"; show-text "Non-local means (LUMA only)" + * + * These shaders can also be enabled by default in mpv.conf, for example: + * + * glsl-shaders='~~/shaders/nlmeans.glsl' + * + * Both of the examples above assume the shaders are located in a subdirectory + * named "shaders" within mpv's config directory. Refer to the mpv + * documentation for more details. + * + * This shader is highly configurable via user variables below. Although the + * default settings should offer good quality at a reasonable speed, you are + * encouraged to tweak them to your preferences. Be mindful that certain + * settings may greatly affect speed. + * + * Denoising is most useful for noisy content. If there is no perceptible + * noise, you probably won't see a positive difference. + * + * The default settings are generally tuned for low noise and high detail + * preservation. The "medium" and "heavy" profiles are tuned for higher levels + * of noise. + * + * The denoiser will not work properly if the content has been upscaled + * beforehand (whether it was done by you or not). In such cases, consider + * issuing a command to downscale in the mpv console (backtick ` key): + * + * vf toggle scale=-2:720 + * + * ...replacing 720 with whatever resolution seems appropriate. Rerun the + * command to undo the downscale. It may take some trial-and-error to find the + * proper resolution. + */ + +/* Regarding speed + * + * Speed may vary wildly for different vo and gpu-api settings. Generally + * vo=gpu-next and gpu-api=vulkan are recommended for the best speed, but this + * may be different for your system. + * + * If your GPU doesn't support textureGather, or if you are on a version of mpv + * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ profile + * + * If you plan on tinkering with NLM's settings, read below: + * + * textureGather only applies to luma and limited to the these configurations: + * + * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2} + * - Default, very fast, rotations and reflections should be free + * - If this is unusually slow then try changing gpu-api and vo + * - If it's still slow, try setting RI/RFI to 0. + * + * - PS=6:RI={0,1,3}:RFI={0,1,2} + * - Currently the only scalable variant + * - Patch shape is asymmetric on two axis + * - Rotations should have very little speed impact + * - Reflections may have a significant speed impact + * + * Options which always disable textureGather: + * - PD + * - NG + */ + +// The following is shader code injected from guided.glsl +/* vi: ft=c + * + * Copyright (c) 2022 an3223 + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 2.1 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +// Description: guided.glsl: Guided by the downscaled image + +/* The radius can be adjusted with the MEANI stage's downscaling factor. + * Higher numbers give a bigger radius. + * + * The E variable can be found in the A stage. + * + * The subsampling (fast guided filter) can be adjusted with the I stage's + * downscaling factor. Higher numbers are faster. + * + * The guide's subsampling can be adjusted with the PREI stage's downscaling + * factor. Higher numbers downscale more. + */ + +//!HOOK LUMA +//!BIND HOOKED +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / +//!DESC Guided filter (PREI) +//!SAVE _INJ_PREI + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!BIND _INJ_PREI +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h +//!DESC Guided filter (I) +//!SAVE _INJ_I + +vec4 hook() +{ +return _INJ_PREI_texOff(0); +} + + +//!HOOK LUMA +//!DESC Guided filter (P) +//!BIND HOOKED +//!WIDTH _INJ_I.w +//!HEIGHT _INJ_I.h +//!SAVE _INJ_P + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!DESC Guided filter (MEANI) +//!BIND _INJ_I +//!WIDTH _INJ_I.w 1.5 / +//!HEIGHT _INJ_I.h 1.5 / +//!SAVE _INJ_MEANI + +vec4 hook() +{ +return _INJ_I_texOff(0); +} + +//!HOOK LUMA +//!DESC Guided filter (MEANP) +//!BIND _INJ_P +//!WIDTH _INJ_MEANI.w +//!HEIGHT _INJ_MEANI.h +//!SAVE _INJ_MEANP + +vec4 hook() +{ +return _INJ_P_texOff(0); +} + +//!HOOK LUMA +//!DESC Guided filter (_INJ_I_SQ) +//!BIND _INJ_I +//!WIDTH _INJ_I.w +//!HEIGHT _INJ_I.h +//!SAVE _INJ_I_SQ + +vec4 hook() +{ +return _INJ_I_texOff(0) * _INJ_I_texOff(0); +} + +//!HOOK LUMA +//!DESC Guided filter (_INJ_IXP) +//!BIND _INJ_I +//!BIND _INJ_P +//!WIDTH _INJ_I.w +//!HEIGHT _INJ_I.h +//!SAVE _INJ_IXP + +vec4 hook() +{ +return _INJ_I_texOff(0) * _INJ_P_texOff(0); +} + +//!HOOK LUMA +//!DESC Guided filter (CORRI) +//!BIND _INJ_I_SQ +//!WIDTH _INJ_MEANI.w +//!HEIGHT _INJ_MEANI.h +//!SAVE _INJ_CORRI + +vec4 hook() +{ +return _INJ_I_SQ_texOff(0); +} + +//!HOOK LUMA +//!DESC Guided filter (CORRP) +//!BIND _INJ_IXP +//!WIDTH _INJ_MEANI.w +//!HEIGHT _INJ_MEANI.h +//!SAVE _INJ_CORRP + +vec4 hook() +{ +return _INJ_IXP_texOff(0); +} + +//!HOOK LUMA +//!DESC Guided filter (A) +//!BIND _INJ_MEANI +//!BIND _INJ_MEANP +//!BIND _INJ_CORRI +//!BIND _INJ_CORRP +//!WIDTH _INJ_I.w +//!HEIGHT _INJ_I.h +//!SAVE _INJ_A + +#define E 0.0013 + +vec4 hook() +{ +vec4 var = _INJ_CORRI_texOff(0) - _INJ_MEANI_texOff(0) * _INJ_MEANI_texOff(0); +vec4 cov = _INJ_CORRP_texOff(0) - _INJ_MEANI_texOff(0) * _INJ_MEANP_texOff(0); + return cov / (var + E); +} + +//!HOOK LUMA +//!DESC Guided filter (B) +//!BIND _INJ_A +//!BIND _INJ_MEANI +//!BIND _INJ_MEANP +//!WIDTH _INJ_I.w +//!HEIGHT _INJ_I.h +//!SAVE _INJ_B + +vec4 hook() +{ +return _INJ_MEANP_texOff(0) - _INJ_A_texOff(0) * _INJ_MEANI_texOff(0); +} + +//!HOOK LUMA +//!DESC Guided filter (MEANA) +//!BIND _INJ_A +//!WIDTH _INJ_MEANI.w +//!HEIGHT _INJ_MEANI.h +//!SAVE _INJ_MEANA + +vec4 hook() +{ +return _INJ_A_texOff(0); +} + +//!HOOK LUMA +//!DESC Guided filter (MEANB) +//!BIND _INJ_B +//!WIDTH _INJ_MEANI.w +//!HEIGHT _INJ_MEANI.h +//!SAVE _INJ_MEANB + +vec4 hook() +{ +return _INJ_B_texOff(0); +} + +//!HOOK LUMA +//!DESC Guided filter +//!BIND HOOKED +//!BIND _INJ_MEANA +//!BIND _INJ_MEANB +//!SAVE RF_LUMA + +vec4 hook() +{ +return _INJ_MEANA_texOff(0) * HOOKED_texOff(0) + _INJ_MEANB_texOff(0); +} + +// End of source code injected from guided.glsl + +//!HOOK LUMA +//!BIND HOOKED +//!BIND RF_LUMA +//!DESC Non-local means (nlmeans_2x.glsl) +//!WIDTH HOOKED.w 2 * +//!HEIGHT HOOKED.h 2 * + +// User variables + +// It is generally preferable to denoise luma and chroma differently, so the +// user variables for luma and chroma are split. + +// Denoising factor (level of blur, higher means more blur) +#ifdef LUMA_raw +#define S 12.8125 +#else +#define S 12.8125 +#endif + +/* Adaptive sharpening + * + * Uses the blur incurred by denoising to perform an unsharp mask, and uses the + * weight map to restrict the sharpening to edges. + * + * If you just want to increase/decrease sharpness then you want to change ASF. + * + * Use V=4 to visualize which areas are sharpened (black means sharpen). + * + * AS: + * - 0 to disable + * - 1 to sharpen+denoise + * - 2 to sharpen only + * ASF: Higher numbers make a sharper image + * ASP: Higher numbers use more of the sharp image + * ASW: + * - 0 to use pre-WD weights + * - 1 to use post-WD weights (ASP should be ~2x to compensate) + * ASK: Weight kernel: + * - 0 for power. This is the old method. + * - 1 for sigmoid. This is generally recommended. + * - 2 for constant (non-adaptive, w/ ASP=0 this sharpens the entire image) + * ASC (only for ASK=1, range 0-1): Reduces the contrast of the edge map + */ +#ifdef LUMA_raw +#define AS 0 +#define ASF 3.0 +#define ASP 1.0 +#define ASW 0 +#define ASK 1 +#define ASC 0.0 +#else +#define AS 0 +#define ASF 3.0 +#define ASP 1.0 +#define ASW 0 +#define ASK 1 +#define ASC 0.0 +#endif + +/* Starting weight + * + * Also known as the center weight. This represents the weight of the + * pixel-of-interest. Lower numbers may help handle heavy noise & ringing. + * + * EPSILON should be used instead of zero to avoid divide-by-zero errors. + */ +#ifdef LUMA_raw +#define SW 0.14876 +#else +#define SW 0.14876 +#endif + +/* Weight discard + * + * Discard weights that fall below a fraction of the average weight. This culls + * the most dissimilar samples from the blur, yielding a much more pleasant + * result, especially around edges. + * + * WD: + * - 2: True average. Better quality, but slower and requires GLSL 4.0 or later + * - 1: Moving cumulative average. Inaccurate, tends to blur directionally. + * - 0: Disable + * + * WDT: Threshold coefficient, higher numbers discard more + * WDP (only for WD=1): Increasing reduces the threshold for small sample sizes + */ +#ifdef LUMA_raw +#define WD 2 +#define WDT 0.63888239592 +#define WDP 6.0 +#else +#define WD 2 +#define WDT 0.63888239592 +#define WDP 6.0 +#endif + +/* Extremes preserve + * + * Reduces denoising around very bright/dark areas. + * + * The downscaling factor of the EP shader stage affects what is considered a + * bright/dark area. The default of 3 should be fine, it's not recommended to + * change this. + * + * This is incompatible with RGB. If you have RGB hooks enabled then you will + * have to delete the EP shader stage or specify EP=0 through shader_cfg. + * + * EP: 1 to enable, 0 to disable + * DP: EP strength on dark patches, 0 to fully denoise + * BP: EP strength on bright patches, 0 to fully denoise + */ +#ifdef LUMA_raw +#define EP 0 +#define BP 0.75 +#define DP 0.25 +#else +#define EP 0 +#define BP 0.0 +#define DP 0.0 +#endif + +/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ +/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ +/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ +/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ +/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ + +/* Patch & research sizes + * + * Patch size should be an odd number greater than or equal to 3. Higher values + * are slower and not always better. + * + * Research size be an odd number greater than or equal to 3. Higher values are + * generally better, but slower, blurrier, and gives diminishing returns. + */ +#ifdef LUMA_raw +#define P 3 +#define R 5 +#else +#define P 3 +#define R 5 +#endif + +/* Patch and research shapes + * + * Different shapes have different speed and quality characteristics. Every + * shape (besides square) is smaller than square. + * + * PS applies applies to patches, RS applies to research zones. + * + * Be wary of gather optimizations (see the Regarding Speed comment at the top) + * + * 0: square (symmetrical) + * 1: horizontal line (asymmetric) + * 2: vertical line (asymmetric) + * 3: diamond (symmetrical) + * 4: triangle (asymmetric, pointing upward) + * 5: truncated triangle (asymmetric on two axis, last row halved) + * 6: even sized square (asymmetric on two axis) + * 7: plus (symmetrical) + */ +#ifdef LUMA_raw +#define RS 3 +#define PS 3 +#else +#define RS 3 +#define PS 3 +#endif + +/* Robust filtering + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. + * + * Compares the pixel-of-interest against a guide, which could be a downscaled + * image or the output of another shader + */ +#define RF_LUMA 1 +#define RF 0 + +/* Rotational/reflectional invariance + * + * Number of rotations/reflections to try for each patch comparison. Can be + * slow, but improves feature preservation. More rotations/reflections gives + * diminishing returns. The most similar rotation/reflection will be used. + * + * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a + * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. + * + * RI: Rotational invariance + * RFI (0 to 2): Reflectional invariance + */ +#ifdef LUMA_raw +#define RI 3 +#define RFI 2 +#else +#define RI 0 +#define RFI 0 +#endif + +/* Temporal denoising + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. + * + * Caveats: + * - Slower: + * - Each frame needs to be researched (more samples & more math) + * - Gather optimizations only apply to the current frame + * - Requires vo=gpu-next + * - Luma-only (this is a bug) + * - Buggy + * + * May cause motion blur and may struggle more with noise that persists across + * multiple frames (e.g., from compression or duplicate frames), but can work + * very well on high quality video. + * + * Motion estimation (ME) should improve quality without impacting speed. + * + * T: number of frames used + * ME: motion estimation, 0 for none, 1 for max weight, 2 for weighted avg + * MEF: estimate factor, compensates for ME being one frame behind + * TRF: compare against the denoised frames + */ +#ifdef LUMA_raw +#define T 0 +#define ME 1 +#define MEF 2 +#define TRF 0 +#else +#define T 0 +#define ME 0 +#define MEF 2 +#define TRF 0 +#endif + +/* Spatial kernel + * + * Increasing the spatial denoising factor (SS) reduces the weight of further + * pixels. + * + * Spatial distortion instructs the spatial kernel to view that axis as + * closer/further, for instance SD=(1,1,0.5) would make the temporal axis + * appear closer and increase blur between frames. + * + * The intra-patch variants are supposed to help with larger patch sizes. + * + * SST: enables spatial kernel if R>=PST, 0 fully disables + * SS: spatial sigma + * SD: spatial distortion (X, Y, time) + * PSS: intra-patch spatial sigma + * PST: enables intra-patch spatial kernel if P>=PST, 0 fully disables + * PSD: intra-patch spatial distortion (X, Y) + */ +#ifdef LUMA_raw +#define SST 1 +#define SS 0.5547703803256947 +#define SD vec3(1,1,1) +#define PST 0 +#define PSS 0.0 +#define PSD vec2(1,1) +#else +#define SST 1 +#define SS 0.5547703803256947 +#define SD vec3(1,1,1) +#define PST 0 +#define PSS 0.0 +#define PSD vec2(1,1) +#endif + +/* Kernels + * + * SK: spatial kernel + * RK: range kernel (takes patch differences) + * PSK: intra-patch spatial kernel + * + * List of available kernels: + * + * bicubic + * cos + * gaussian + * lanczos + * quadratic + * sinc + * sphinx + */ +#ifdef LUMA_raw +#define SK lanczos +#define RK gaussian +#define PSK gaussian +#else +#define SK lanczos +#define RK gaussian +#define PSK gaussian +#endif + +// Scaling factor (should match WIDTH/HEIGHT) +#ifdef LUMA_raw +#define SF 1 +#else +#define SF 1 +#endif + +/* Visualization + * + * 0: off + * 1: absolute difference between input/output to the power of 0.25 + * 2: difference between input/output centered on 0.5 + * 3: avg_weight + * 4: edge map (based on the relevant AS settings) + */ +#ifdef LUMA_raw +#define V 0 +#else +#define V 0 +#endif + +// Blur factor (0.0 returns the input image, 1.0 returns the output image) +#ifdef LUMA_raw +#define BF 1.0 +#else +#define BF 1.0 +#endif + +// Force disable textureGather +#ifdef LUMA_raw +#define NG 0 +#else +#define NG 0 +#endif + +// Patch donut (probably useless) +#ifdef LUMA_raw +#define PD 0 +#else +#define PD 0 +#endif + +// Duplicate 1st weight (for luma-guided-chroma) +#ifdef LUMA_raw +#define D1W 0 +#else +#define D1W 0 +#endif + +// Skip patch comparison +#ifdef LUMA_raw +#define SKIP_PATCH 0 +#else +#define SKIP_PATCH 0 +#endif + +// Shader code + +#define EPSILON 0.00000000001 +#define M_PI 3.14159265358979323846 +#define POW2(x) ((x)*(x)) +#define POW3(x) ((x)*(x)*(x)) +#define bicubic(x) ((1.0/6.0) * (POW3((x)+2) - 4 * POW3((x)+1) + 6 * POW3(x) - 4 * POW3(max((x)-1, 0)))) +#define gaussian(x) exp(-1 * POW2(x)) +#define lanczos(x) POW2(sinc(x)) +#define quadratic(x) ((x) < 0.5 ? 0.75 - POW2(x) : 0.5 * POW2((x) - 1.5)) +#define sinc(x) ((x) < 1e-8 ? 1.0 : sin((x)*M_PI) / ((x)*M_PI)) +#define sphinx(x) ((x) < 1e-8 ? 1.0 : 3.0 * (sin((x)*M_PI) - (x)*M_PI * cos((x)*M_PI)) / POW3((x)*M_PI)) + +// XXX could maybe be better optimized on LGC +// XXX return original alpha component instead of 1.0 +#if defined(LUMA_raw) +#define val float +#define val_swizz(v) (v.x) +#define unval(v) vec4(v.x, 0, 0, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#elif defined(CHROMA_raw) +#define val vec2 +#define val_swizz(v) (v.xy) +#define unval(v) vec4(v.x, v.y, 0, 1.0) +#define val_packed uint +#define val_pack(v) packUnorm2x16(v) +#define val_unpack(v) unpackUnorm2x16(v) +#else +#define val vec3 +#define val_swizz(v) (v.xyz) +#define unval(v) vec4(v.x, v.y, v.z, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#endif + +#if PS == 6 +const int hp = P/2; +#else +const float hp = int(P/2) - 0.5*(1-(P%2)); // sample between pixels for even patch sizes +#endif + +#if RS == 6 +const int hr = R/2; +#else +const float hr = int(R/2) - 0.5*(1-(R%2)); // sample between pixels for even research sizes +#endif + +// donut increment, increments without landing on (0,0,0) +// much faster than a continue statement +#define DINCR(z,c) (z.c++,(z.c += int(z == vec3(0)))) + +// patch/research shapes +// each shape is depicted in a comment, where Z=5 (Z corresponds to P or R) +// dots (.) represent samples (pixels) and X represents the pixel-of-interest + +// Z ..... +// Z ..... +// Z ..X.. +// Z ..... +// Z ..... +#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) + +// (in this instance Z=4) +// Z .... +// Z .... +// Z ..X. +// Z .... +#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) + +// Z-4 . +// Z-2 ... +// Z ..X.. +#define S_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz); incr) + +// Z-4 . +// Z-2 ... +// hz+1 ..X +#define S_TRUNC_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz)*int(z.y!=0); incr) +#define S_TRIANGLE_A(hz,Z) int(hz*hz+Z) + +// Z-4 . +// Z-2 ... +// Z ..X.. +// Z-2 ... +// Z-4 . +#define S_DIAMOND(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -abs(abs(z.x) - hz); z.y <= abs(abs(z.x) - hz); incr) +#define S_DIAMOND_A(hz,Z) int(hz*hz*2+Z) + +// +// Z ..X.. +// +#define S_HORIZONTAL(z,hz,incr) for (z.x = -hz; z.x <= hz; incr) for (z.y = 0; z.y <= 0; z.y++) + +// 90 degree rotation of S_HORIZONTAL +#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) + +// 1 . +// 1 . +// Z ..X.. +// 1 . +// 1 . +#define S_PLUS(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz * int(z.x == 0); z.y <= hz * int(z.x == 0); incr) +#define S_PLUS_A(hz,Z) (Z*2 - 1) + +// XXX implement S_PLUS w/ an X overlayed: +// 3 . . . +// 3 ... +// Z ..X.. +// 3 ... +// 3 . . . + +// XXX implement an X shape: +// 2 . . +// 2 . . +// 1 X +// 2 . . +// 2 . . + +// 1x1 square +#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) + +#define T1 (T+1) +#define FOR_FRAME(r) for (r.z = 0; r.z < T1; r.z++) + +#ifdef LUMA_raw +#define RF_ RF_LUMA +#else +#define RF_ RF +#endif + +// Skip comparing the pixel-of-interest against itself, unless RF is enabled +#if RF_ +#define RINCR(z,c) (z.c++) +#else +#define RINCR DINCR +#endif + +#define R_AREA(a) (a * T1 + RF_-1) + +// research shapes +// XXX would be nice to have the option of temporally-varying research sizes +#if R == 0 || R == 1 +#define FOR_RESEARCH(r) S_1X1(r) +const int r_area = R_AREA(1); +#elif RS == 7 +#define FOR_RESEARCH(r) S_PLUS(r,hr,RINCR(r,y)) +const int r_area = R_AREA(S_PLUS_A(hr,R)); +#elif RS == 6 +#define FOR_RESEARCH(r) S_SQUARE_EVEN(r,hr,RINCR(r,y)) +const int r_area = R_AREA(R*R); +#elif RS == 5 +#define FOR_RESEARCH(r) S_TRUNC_TRIANGLE(r,hr,RINCR(r,x)) +const int r_area = R_AREA(S_TRIANGLE_A(hr,hr)); +#elif RS == 4 +#define FOR_RESEARCH(r) S_TRIANGLE(r,hr,RINCR(r,x)) +const int r_area = R_AREA(S_TRIANGLE_A(hr,R)); +#elif RS == 3 +#define FOR_RESEARCH(r) S_DIAMOND(r,hr,RINCR(r,y)) +const int r_area = R_AREA(S_DIAMOND_A(hr,R)); +#elif RS == 2 +#define FOR_RESEARCH(r) S_VERTICAL(r,hr,RINCR(r,y)) +const int r_area = R_AREA(R); +#elif RS == 1 +#define FOR_RESEARCH(r) S_HORIZONTAL(r,hr,RINCR(r,x)) +const int r_area = R_AREA(R); +#elif RS == 0 +#define FOR_RESEARCH(r) S_SQUARE(r,hr,RINCR(r,y)) +const int r_area = R_AREA(R*R); +#endif + +#define RI1 (RI+1) +#define RFI1 (RFI+1) + +#if RI +#define FOR_ROTATION for (float ri = 0; ri < 360; ri+=360.0/RI1) +#else +#define FOR_ROTATION +#endif + +#if RFI +#define FOR_REFLECTION for (int rfi = 0; rfi < RFI1; rfi++) +#else +#define FOR_REFLECTION +#endif + +#if PD +#define PINCR DINCR +#else +#define PINCR(z,c) (z.c++) +#endif + +#define P_AREA(a) (a - PD) + +// patch shapes +#if P == 0 || P == 1 +#define FOR_PATCH(p) S_1X1(p) +const int p_area = P_AREA(1); +#elif PS == 7 +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) +const int p_area = P_AREA(S_PLUS_A(hp,P)); +#elif PS == 6 +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) +const int p_area = P_AREA(P*P); +#elif PS == 5 +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) +const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); +#elif PS == 4 +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) +const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); +#elif PS == 3 +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) +const int p_area = P_AREA(S_DIAMOND_A(hp,P)); +#elif PS == 2 +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) +const int p_area = P_AREA(P); +#elif PS == 1 +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) +const int p_area = P_AREA(P); +#elif PS == 0 +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) +const int p_area = P_AREA(P*P); +#endif + +const float r_scale = 1.0/r_area; +const float p_scale = 1.0/p_area; + +#define sample(tex, pos, size, pt, off) tex(pos + pt * (vec2(off) + 0.5 - fract(pos*size))) +#define load_(off) sample(HOOKED_tex, HOOKED_pos, HOOKED_size, HOOKED_pt, off) + +#if RF_ && defined(LUMA_raw) +#define load2_(off) sample(RF_LUMA_tex, RF_LUMA_pos, RF_LUMA_size, RF_LUMA_pt, off) +#define gather_offs(off, off_arr) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, off_arr))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off) * RF_LUMA_pt, 0) +#elif RF_ && D1W +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) +#define gather_offs(off, off_arr) (RF_mul * vec4(textureGatherOffsets(RF_raw, RF_pos + vec2(off) * RF_pt, off_arr))) +#define gather(off) RF_gather(RF_pos + (off) * RF_pt, 0) +#elif RF_ +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) +#else +#define load2_(off) load_(off) +#define gather_offs(off, off_arr) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, off_arr))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) +#endif + +#if T +val load(vec3 off) +{ + switch (min(int(off.z), frame)) { + case 0: return val_swizz(load_(off)); + + } +} +val load2(vec3 off) +{ + return off.z == 0 ? val_swizz(load2_(off)) : load(off); +} +#else +#define load(off) val_swizz(load_(off)) +#define load2(off) val_swizz(load2_(off)) +#endif + +val poi = load(vec3(0)); // pixel-of-interest +val poi2 = load2(vec3(0)); // guide pixel-of-interest + +#if RI // rotation +vec2 rot(vec2 p, float d) +{ + return vec2( + p.x * cos(radians(d)) - p.y * sin(radians(d)), + p.y * sin(radians(d)) + p.x * cos(radians(d)) + ); +} +#else +#define rot(p, d) (p) +#endif + +#if RFI // reflection +vec2 ref(vec2 p, int d) +{ + switch (d) { + case 0: return p; + case 1: return p * vec2(1, -1); + case 2: return p * vec2(-1, 1); + } +} +#else +#define ref(p, d) (p) +#endif + +#if SST && R >= SST +float spatial_r(vec3 v) +{ + v.xy += 0.5 - fract(HOOKED_pos*HOOKED_size); + return SK(length(v*SD)*SS); +} +#else +#define spatial_r(v) (1) +#endif + +#if PST && P >= PST +#define spatial_p(v) PSK(length(v*PSD)*PSS) +#else +#define spatial_p(v) (1) +#endif + +val range(val pdiff_sq) +{ + const float h = S*0.013; + const float pdiff_scale = 1.0/(h*h); + pdiff_sq = sqrt(pdiff_sq * pdiff_scale); +#if defined(LUMA_raw) + return RK(pdiff_sq); +#elif defined(CHROMA_raw) + return vec2(RK(pdiff_sq.x), RK(pdiff_sq.y)); +#else + return vec3(RK(pdiff_sq.x), RK(pdiff_sq.y), RK(pdiff_sq.z)); +#endif + //return exp(-pdiff_sq * pdiff_scale); + + // weight function from the NLM paper, it's not very good + //return exp(-max(pdiff_sq - 2*S*S, 0.0) * pdiff_scale); +} + +val patch_comparison(vec3 r, vec3 r2) +{ + vec3 p; + val min_rot = val(p_area); + + FOR_ROTATION FOR_REFLECTION { + val pdiff_sq = val(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + val diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); + diff_sq *= diff_sq; + diff_sq = 1 - (1 - diff_sq) * spatial_p(p.xy); + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); + } + + return min_rot * p_scale; +} + +#define NO_GATHER (PD == 0 && NG == 0) // never textureGather if any of these conditions are false +#define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) + +#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && REGULAR_ROTATIONS && NO_GATHER +// 3x3 diamond/plus patch_comparison_gather +// XXX extend to support arbitrary sizes (probably requires code generation) +// XXX extend to support 3x3 square +// XXX support PSS +const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; +const ivec2 offsets_sf[4] = { ivec2(0,-1) * SF, ivec2(-1,0) * SF, ivec2(0,1) * SF, ivec2(1,0) * SF }; +vec4 poi_patch = gather_offs(0, offsets); +float patch_comparison_gather(vec3 r, vec3 r2) +{ + float min_rot = p_area - 1; + vec4 transformer = gather_offs(r, offsets_sf); + FOR_ROTATION { + FOR_REFLECTION { + float diff_sq = dot((poi_patch - transformer) * (poi_patch - transformer), vec4(1)); + min_rot = min(diff_sq, min_rot); +#if RFI + switch(rfi) { + case 0: transformer = transformer.zyxw; break; + case 1: transformer = transformer.zwxy; break; // undoes last mirror, performs another mirror + case 2: transformer = transformer.zyxw; break; // undoes last mirror + } +#endif + } +#if RI == 3 + transformer = transformer.wxyz; +#elif RI == 1 + transformer = transformer.zwxy; +#endif + } + float center_diff_sq = poi2.x - load2(r).x; + center_diff_sq *= center_diff_sq; + return (min_rot + center_diff_sq) * p_scale; +} +#elif (defined(LUMA_gather) || D1W) && PS == 6 && RI == 0 && RFI == 0 && NO_GATHER +// tiled even square patch_comparison_gather +// XXX extend to support odd square? +float patch_comparison_gather(vec3 r, vec3 r2) +{ + vec2 tile; + float min_rot = p_area; + + /* gather order: + * w z + * x y + */ + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { + vec4 diff_sq = gather(tile + r.xy) - gather(tile + r2.xy); + diff_sq *= diff_sq; + diff_sq = 1 - (1 - diff_sq) * vec4(spatial_p(tile+vec2(0,1)), spatial_p(tile+vec2(1,1)), + spatial_p(tile+vec2(1,0)), spatial_p(tile+vec2(0,0))); + pdiff_sq += dot(diff_sq, vec4(1)); + } + min_rot = min(min_rot, pdiff_sq); + + return min_rot * p_scale; +} +#else +#define patch_comparison_gather patch_comparison +#endif + +vec4 hook() +{ + val total_weight = val(0); + val sum = val(0); + val result = val(0); + + vec3 r = vec3(0); + vec3 p = vec3(0); + vec3 me = vec3(0); + +#if T && ME == 1 // temporal & motion estimation + vec3 me_tmp = vec3(0); + float maxweight = 0; +#elif T && ME == 2 // temporal & motion estimation + vec3 me_sum = vec3(0); + float me_weight = 0; +#endif + +#if WD == 2 // weight discard + int r_index = 0; + val_packed all_weights[r_area]; + val_packed all_pixels[r_area]; +#elif WD == 1 // weight discard + val no_weights = val(0); + val discard_total_weight = val(0); + val discard_sum = val(0); +#endif + + FOR_FRAME(r) { + // XXX ME is always a frame behind, should have to option to re-research after applying ME (could do it an arbitrary number of times per frame if desired) +#if T && ME == 1 // temporal & motion estimation max weight + if (r.z > 0) { + me += me_tmp * MEF; + me_tmp = vec3(0); + maxweight = 0; + } +#elif T && ME == 2 // temporal & motion estimation weighted average + if (r.z > 0) { + me += round(me_sum / me_weight * MEF); + me_sum = vec3(0); + me_weight = 0; + } +#endif + FOR_RESEARCH(r) { // main NLM logic +#if SKIP_PATCH + val weight = val(1); +#else + val pdiff_sq = (r.z == 0) ? val(patch_comparison_gather(r+me, vec3(0))) : patch_comparison(r+me, vec3(0)); + val weight = range(pdiff_sq); +#endif + +#if T && ME == 1 // temporal & motion estimation max weight + me_tmp = vec3(r.xy,0) * step(maxweight, weight.x) + me_tmp * (1 - step(maxweight, weight.x)); + maxweight = max(maxweight, weight.x); +#elif T && ME == 2 // temporal & motion estimation weighted average + me_sum += vec3(r.xy,0) * weight.x; + me_weight += weight.x; +#endif + +#if D1W + weight = val(weight.x); +#endif + + weight *= spatial_r(r); + +#if WD == 2 // weight discard + all_weights[r_index] = val_pack(weight); + all_pixels[r_index] = val_pack(load(r+me)); + r_index++; +#elif WD == 1 // weight discard + val wd_scale = 1.0/max(no_weights, 1); + val keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); + discard_sum += load(r+me) * weight * (1 - keeps); + discard_total_weight += weight * (1 - keeps); + no_weights += keeps; +#endif + + sum += load(r+me) * weight; + total_weight += weight; + } // FOR_RESEARCH + } // FOR_FRAME + + val avg_weight = total_weight * r_scale; + val old_avg_weight = avg_weight; + +#if WD == 2 // true average + total_weight = val(0); + sum = val(0); + val no_weights = val(0); + + for (int i = 0; i < r_area; i++) { + val w = val_unpack(all_weights[i]); + val px = val_unpack(all_pixels[i]); + val keeps = step(avg_weight*WDT, w); + + w *= keeps; + sum += px * w; + total_weight += w; + no_weights += keeps; + } +#elif WD == 1 // moving cumulative average + total_weight -= discard_total_weight; + sum -= discard_sum; +#endif +#if WD // weight discard + avg_weight = total_weight / no_weights; +#endif + + total_weight += SW * spatial_r(vec3(0)); + sum += poi * SW * spatial_r(vec3(0)); + +#if V == 3 // weight map + result = val(avg_weight); +#else // mean + result = val(sum / total_weight); +#endif + + // store frames for temporal +#if T > 1 + +#endif +#if T && TRF + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(result)); +#elif T + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(poi2)); +#endif + +#if ASW == 0 // pre-WD weights +#define AS_weight old_avg_weight +#elif ASW == 1 // post-WD weights +#define AS_weight avg_weight +#endif + +#if ASK == 0 + val sharpening_strength = pow(AS_weight, val(ASP)); +#elif ASK == 1 + val sharpening_strength = mix( + pow(smoothstep(0.0, 1.0, AS_weight), val(ASP)), + AS_weight, ASC); + // XXX normalize the result to account for a negative ASC? +#elif ASK == 2 + val sharpening_strength = val(ASP); +#endif + +#if AS == 1 // sharpen+denoise + val sharpened = result + (poi - result) * ASF; +#elif AS == 2 // sharpen only + val sharpened = poi + (poi - result) * ASF; +#endif + +#if EP // extremes preserve + float luminance = EP_texOff(0).x; + // EPSILON is needed since pow(0,0) is undefined + float ep_weight = pow(max(min(1-luminance, luminance)*2, EPSILON), (luminance < 0.5 ? DP : BP)); + result = mix(poi, result, ep_weight); +#endif + +#if AS == 1 // sharpen+denoise + result = mix(sharpened, result, sharpening_strength); +#elif AS == 2 // sharpen only + result = mix(sharpened, poi, sharpening_strength); +#endif + +#if V == 4 // edge map + result = sharpening_strength; +#endif + +#if (V == 3 || V == 4) && defined(CHROMA_raw) // drop chroma for these visualizations + return vec4(0.5); +#endif + +#if V == 1 + result = clamp(pow(abs(poi - result), val(0.25)), 0.0, 1.0); +#elif V == 2 + result = (poi - result) * 0.5 + 0.5; +#endif + + return unval(mix(poi, result, BF)); +} + diff --git a/portable_config/shaders/nlmeans_hq.glsl b/portable_config/shaders/nlmeans_hq.glsl deleted file mode 100644 index e030354d..00000000 --- a/portable_config/shaders/nlmeans_hq.glsl +++ /dev/null @@ -1,2161 +0,0 @@ -/* vi: ft=c - * - * Based on vf_nlmeans.c from FFmpeg. - * - * Copyright (c) 2022 an3223 - * Copyright (c) 2016 Clément Bœsch - * - * This program is free software: you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 2.1 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see . - */ - -// Profile description: Slow, but higher quality. - -/* The recommended usage of this shader and its variant profiles is to add them - * to input.conf and then dispatch the appropriate shader via a keybind during - * media playback. Here is an example input.conf entry: - * - * F4 no-osd change-list glsl-shaders toggle "~~/shaders/nlmeans_luma.glsl"; show-text "Non-local means (LUMA only)" - * - * These shaders can also be enabled by default in mpv.conf, for example: - * - * glsl-shaders='~~/shaders/nlmeans.glsl' - * - * Both of the examples above assume the shaders are located in a subdirectory - * named "shaders" within mpv's config directory. Refer to the mpv - * documentation for more details. - * - * This shader is highly configurable via user variables below. Although the - * default settings should offer good quality at a reasonable speed, you are - * encouraged to tweak them to your preferences. Be mindful that certain - * settings may greatly affect speed. - * - * Denoising is most useful for noisy content. If there is no perceptible - * noise, you probably won't see a positive difference. - * - * The default settings are generally tuned for low noise and high detail - * preservation. The "medium" and "heavy" profiles are tuned for higher levels - * of noise. - * - * The denoiser will not work properly if the content has been upscaled - * beforehand, whether it was done by you or someone down the line. Consider - * issuing a command to downscale in the mpv console, like so: - * - * vf toggle scale=-2:720 - * - * ...replacing 720 with whatever resolution seems appropriate. Rerun the - * command to undo the downscale. It may take some trial-and-error to find the - * proper resolution. - */ - -/* Regarding speed - * - * Speed may vary wildly for different vo and gpu-api settings. Generally - * vo=gpu-next and gpu-api=vulkan are recommended for the best speed, but this - * may be different for your system. - * - * If your GPU doesn't support textureGather, or if you are on a version of mpv - * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ and VLQ - * profiles. - * - * textureGather is LUMA only and limited to the following configurations: - * - * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2}:M!=1 - * - Default, very fast, rotations and reflections should be free - * - If this is unusually slow then try changing gpu-api and vo - * - If it's still slow, try setting RI/RFI to 0. - * - * - PS=6:RI={0,1,3}:RFI={0,1,2} - * - Currently the only scalable variant - * - Patch shape is asymmetric on two axis - * - Rotations should have very little speed impact - * - Reflections may have a significant speed impact - * - * Options which always disable textureGather: - * - PD - */ - -// The following is shader code injected from nlmeans.glsl -/* vi: ft=c - * - * Based on vf_nlmeans.c from FFmpeg. - * - * Copyright (c) 2022 an3223 - * Copyright (c) 2016 Clément Bœsch - * - * This program is free software: you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 2.1 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see . - */ - -// Profile description: Default profile, general purpose, tuned for low noise - -/* The recommended usage of this shader and its variant profiles is to add them - * to input.conf and then dispatch the appropriate shader via a keybind during - * media playback. Here is an example input.conf entry: - * - * F4 no-osd change-list glsl-shaders toggle "~~/shaders/nlmeans_luma.glsl"; show-text "Non-local means (LUMA only)" - * - * These shaders can also be enabled by default in mpv.conf, for example: - * - * glsl-shaders='~~/shaders/nlmeans.glsl' - * - * Both of the examples above assume the shaders are located in a subdirectory - * named "shaders" within mpv's config directory. Refer to the mpv - * documentation for more details. - * - * This shader is highly configurable via user variables below. Although the - * default settings should offer good quality at a reasonable speed, you are - * encouraged to tweak them to your preferences. Be mindful that certain - * settings may greatly affect speed. - * - * Denoising is most useful for noisy content. If there is no perceptible - * noise, you probably won't see a positive difference. - * - * The default settings are generally tuned for low noise and high detail - * preservation. The "medium" and "heavy" profiles are tuned for higher levels - * of noise. - * - * The denoiser will not work properly if the content has been upscaled - * beforehand, whether it was done by you or someone down the line. Consider - * issuing a command to downscale in the mpv console, like so: - * - * vf toggle scale=-2:720 - * - * ...replacing 720 with whatever resolution seems appropriate. Rerun the - * command to undo the downscale. It may take some trial-and-error to find the - * proper resolution. - */ - -/* Regarding speed - * - * Speed may vary wildly for different vo and gpu-api settings. Generally - * vo=gpu-next and gpu-api=vulkan are recommended for the best speed, but this - * may be different for your system. - * - * If your GPU doesn't support textureGather, or if you are on a version of mpv - * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ and VLQ - * profiles. - * - * textureGather is LUMA only and limited to the following configurations: - * - * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2}:M!=1 - * - Default, very fast, rotations and reflections should be free - * - If this is unusually slow then try changing gpu-api and vo - * - If it's still slow, try setting RI/RFI to 0. - * - * - PS=6:RI={0,1,3}:RFI={0,1,2} - * - Currently the only scalable variant - * - Patch shape is asymmetric on two axis - * - Rotations should have very little speed impact - * - Reflections may have a significant speed impact - * - * Options which always disable textureGather: - * - PD - */ - -// The following is shader code injected from guided.glsl -/* vi: ft=c - * - * Copyright (c) 2022 an3223 - * - * This program is free software: you can redistribute it and/or modify it - * under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 2.1 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see . - */ - -//desc: Guided filter guided by the downscaled image - -/* The radius can be adjusted with the MEANI stage's downscaling factor. - * Higher numbers give a bigger radius. - * - * The E variable can be found in the A stage. - * - * The subsampling (fast guided filter) can be adjusted with the I stage's - * downscaling factor. Higher numbers are faster. - * - * The guide's subsampling can be adjusted with the PREI stage's downscaling - * factor. Higher numbers downscale more. - */ - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (PREI) -//!BIND HOOKED -//!WIDTH HOOKED.w 1.25 / -//!HEIGHT HOOKED.h 1.25 / -//!SAVE _INJ__INJ_PREI - -vec4 hook() -{ - return HOOKED_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (I) -//!BIND _INJ__INJ_PREI -//!WIDTH HOOKED.w 1.0 / -//!HEIGHT HOOKED.h 1.0 / -//!SAVE _INJ__INJ_I - -vec4 hook() -{ -return _INJ__INJ_PREI_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (P) -//!BIND HOOKED -//!WIDTH _INJ__INJ_I.w -//!HEIGHT _INJ__INJ_I.h -//!SAVE _INJ__INJ_P - -vec4 hook() -{ - return HOOKED_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (MEANI) -//!BIND _INJ__INJ_I -//!WIDTH _INJ__INJ_I.w 1.5 / -//!HEIGHT _INJ__INJ_I.h 1.5 / -//!SAVE _INJ__INJ_MEANI - -vec4 hook() -{ -return _INJ__INJ_I_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (MEANP) -//!BIND _INJ__INJ_P -//!WIDTH _INJ__INJ_MEANI.w -//!HEIGHT _INJ__INJ_MEANI.h -//!SAVE _INJ__INJ_MEANP - -vec4 hook() -{ -return _INJ__INJ_P_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (_INJ__INJ_I_SQ) -//!BIND _INJ__INJ_I -//!WIDTH _INJ__INJ_I.w -//!HEIGHT _INJ__INJ_I.h -//!SAVE _INJ__INJ_I_SQ - -vec4 hook() -{ -return _INJ__INJ_I_texOff(0) * _INJ__INJ_I_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (_INJ__INJ_IXP) -//!BIND _INJ__INJ_I -//!BIND _INJ__INJ_P -//!WIDTH _INJ__INJ_I.w -//!HEIGHT _INJ__INJ_I.h -//!SAVE _INJ__INJ_IXP - -vec4 hook() -{ -return _INJ__INJ_I_texOff(0) * _INJ__INJ_P_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (CORRI) -//!BIND _INJ__INJ_I_SQ -//!WIDTH _INJ__INJ_MEANI.w -//!HEIGHT _INJ__INJ_MEANI.h -//!SAVE _INJ__INJ_CORRI - -vec4 hook() -{ -return _INJ__INJ_I_SQ_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (CORRP) -//!BIND _INJ__INJ_IXP -//!WIDTH _INJ__INJ_MEANI.w -//!HEIGHT _INJ__INJ_MEANI.h -//!SAVE _INJ__INJ_CORRP - -vec4 hook() -{ -return _INJ__INJ_IXP_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (A) -//!BIND _INJ__INJ_MEANI -//!BIND _INJ__INJ_MEANP -//!BIND _INJ__INJ_CORRI -//!BIND _INJ__INJ_CORRP -//!WIDTH _INJ__INJ_I.w -//!HEIGHT _INJ__INJ_I.h -//!SAVE _INJ__INJ_A - -#define E 0.0013 - -vec4 hook() -{ -vec4 var = _INJ__INJ_CORRI_texOff(0) - _INJ__INJ_MEANI_texOff(0) * _INJ__INJ_MEANI_texOff(0); -vec4 cov = _INJ__INJ_CORRP_texOff(0) - _INJ__INJ_MEANI_texOff(0) * _INJ__INJ_MEANP_texOff(0); - return cov / (var + E); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (B) -//!BIND _INJ__INJ_A -//!BIND _INJ__INJ_MEANI -//!BIND _INJ__INJ_MEANP -//!WIDTH _INJ__INJ_I.w -//!HEIGHT _INJ__INJ_I.h -//!SAVE _INJ__INJ_B - -vec4 hook() -{ -return _INJ__INJ_MEANP_texOff(0) - _INJ__INJ_A_texOff(0) * _INJ__INJ_MEANI_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (MEANA) -//!BIND _INJ__INJ_A -//!WIDTH _INJ__INJ_MEANI.w -//!HEIGHT _INJ__INJ_MEANI.h -//!SAVE _INJ__INJ_MEANA - -vec4 hook() -{ -return _INJ__INJ_A_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter (MEANB) -//!BIND _INJ__INJ_B -//!WIDTH _INJ__INJ_MEANI.w -//!HEIGHT _INJ__INJ_MEANI.h -//!SAVE _INJ__INJ_MEANB - -vec4 hook() -{ -return _INJ__INJ_B_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Guided filter -//!BIND HOOKED -//!BIND _INJ__INJ_MEANA -//!BIND _INJ__INJ_MEANB -//!SAVE _INJ_RF_LUMA - -vec4 hook() -{ -return _INJ__INJ_MEANA_texOff(0) * HOOKED_texOff(0) + _INJ__INJ_MEANB_texOff(0); -} - -// End of source code injected from guided.glsl -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Non-local means (downscale) -//!WIDTH LUMA.w 3 / -//!HEIGHT LUMA.h 3 / -//!BIND LUMA -//!SAVE _INJ_EP - -vec4 hook() -{ - return LUMA_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Non-local means (share) -//!BIND _INJ_RF_LUMA -//!SAVE _INJ_RF - -vec4 hook() -{ -return _INJ_RF_LUMA_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!BIND HOOKED -//!BIND _INJ_RF_LUMA -//!BIND _INJ_EP -//!BIND _INJ_RF -//!DESC Non-local means (nlmeans.glsl) -//!SAVE RF_LUMA - -/* User variables - * - * It is usually preferable to denoise chroma and luma differently, so the user - * variables for luma and chroma are split. - */ - -/* S = denoising factor - * P = patch size - * R = research size - * - * The denoising factor controls the level of blur, higher is blurrier. - * - * Patch size should usually be an odd number greater than or equal to 3. - * Higher values are slower and not always better. - * - * Research size usually be an odd number greater than or equal to 3. Higher - * values are usually better, but slower and offer diminishing returns. - * - * Even-numbered patch/research sizes will sample between pixels unless PS=6. - * It's not known whether this is ever useful behavior or not. This is - * incompatible with textureGather optimizations, so NG=1 to disable them. - */ -#ifdef LUMA_raw -#define S 2.0 -#define P 3 -#define R 5 -#else -#define S 5.0 -#define P 3 -#define R 5 -#endif - -/* Adaptive sharpening - * - * Uses the blur incurred by denoising to perform an unsharp mask, and uses the - * weight map to restrict the sharpening to edges. - * - * Use M=4 to get a good look at which areas are/aren't sharpened. - * - * AS: 2 for sharpening, 1 for sharpening+denoising, 0 to disable - * ASF: Sharpening factor, higher numbers make a sharper underlying image - * ASP: Weight power, higher numbers use more of the sharp image - * ASW: - * - 0 to use pre-WD weights - * - 1 to use post-WD weights (ASP should be ~2x to compensate) - * ASK: Weight kernel: - * - 0 for power. This is the old method. - * - 1 for sigmoid. This is generally recommended. - * - 2 for constant (non-adaptive, w/ ASP=0 this sharpens the entire image) - * ASC (only for ASK=1, range 0-1): Reduces the contrast of the edge map - */ -#ifdef LUMA_raw -#define AS 0 -#define ASF 2.0 -#define ASP 4.0 -#define ASW 0 -#define ASK 1 -#define ASC 0.0 -#else -#define AS 0 -#define ASF 2.0 -#define ASP 4.0 -#define ASW 0 -#define ASK 1 -#define ASC 0.0 -#endif - -/* Starting weight - * - * Lower numbers give less weight to the pixel-of-interest, which may help - * handle higher noise levels, ringing, and may be useful for other things too? - * - * EPSILON should be used instead of zero to avoid divide-by-zero errors. The - * avg_weight/old_avg_weight variables may be used to make SW adapt to the - * local noise level, e.g., SW=max(avg_weight, EPSILON) - */ -#ifdef LUMA_raw -#define SW 1.0 -#else -#define SW 0.5 -#endif - -/* Weight discard - * - * Discard weights that fall below a fraction of the average weight. This culls - * the most dissimilar samples from the blur, yielding a much more pleasant - * result, especially around edges. - * - * WD: - * - 2: True average. Very good quality, but slower and uses more memory. - * - 1: Moving cumulative average. Inaccurate, tends to blur directionally. - * - 0: Disable - * - * WDT: Threshold coefficient, higher numbers discard more - * WDP (only for WD=1): Increasing reduces the threshold for small sample sizes - */ -#ifdef LUMA_raw -#define WD 2 -#define WDT 0.5 -#define WDP 6.0 -#else -#define WD 2 -#define WDT 0.75 -#define WDP 6.0 -#endif - -/* Extremes preserve - * - * Reduces denoising around very bright/dark areas. The downscaling factor of - * EP (located near the top of this shader) controls the area sampled for - * luminance (higher numbers consider more area). - * - * This is incompatible with RGB. If you have RGB hooks enabled then you will - * have to delete the EP shader stage or specify EP=0 through nlmeans_cfg. - * - * EP: 1 to enable, 0 to disable - * DP: EP strength on dark patches, 0 to fully denoise - * BP: EP strength on bright patches, 0 to fully denoise - */ -#ifdef LUMA_raw -#define EP 1 -#define BP 0.75 -#define DP 0.25 -#else -#define EP 0 -#define BP 0.0 -#define DP 0.0 -#endif - -/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ - -/* Robust filtering - * - * This setting is dependent on code generation from nlmeans_cfg, so this - * setting can only be enabled via nlmeans_cfg. - * - * Compares the pixel-of-interest against a guide, which could be a downscaled - * image or the output of another shader such as guided.glsl - */ -#ifdef LUMA_raw -#define RF 1 -#else -#define RF 1 -#endif - -/* Search shape - * - * Determines the shape of patches and research zones. Different shapes have - * different speed and quality characteristics. Every shape (besides square) is - * smaller than square. - * - * PS applies applies to patches, RS applies to research zones. - * - * Be wary of gather optimizations (see the Regarding Speed comment at the top) - * - * 0: square (symmetrical) - * 1: horizontal line (asymmetric) - * 2: vertical line (asymmetric) - * 3: diamond (symmetrical) - * 4: triangle (asymmetric, pointing upward) - * 5: truncated triangle (asymmetric on two axis, last row halved) - * 6: even sized square (asymmetric on two axis) - * 7: plus (symmetrical) - */ -#ifdef LUMA_raw -#define RS 3 -#define PS 3 -#else -#define RS 3 -#define PS 3 -#endif - -/* Rotational/reflectional invariance - * - * Number of rotations/reflections to try for each patch comparison. Slow, but - * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. The most similar rotation/reflection will be used. - * - * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a - * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. - * - * RI: Rotational invariance - * RFI (0 to 2): Reflectional invariance - */ -#ifdef LUMA_raw -#define RI 3 -#define RFI 2 -#else -#define RI 0 -#define RFI 0 -#endif - -/* Temporal denoising - * - * Caveats: - * - Slower, each frame needs to be researched - * - Requires vo=gpu-next and nlmeans_temporal.glsl - * - Luma-only (this is a bug) - * - Buggy - * - * Gather samples across multiple frames. May cause motion blur and may - * struggle more with noise that persists across multiple frames (e.g., from - * compression or duplicate frames), but can work very well on high quality - * video. - * - * Motion estimation (ME) should improve quality without impacting speed. - * - * T: number of frames used - * ME: motion estimation, 0 for none, 1 for max weight, 2 for weighted avg - */ -#ifdef LUMA_raw -#define T 0 -#define ME 1 -#else -#define T 0 -#define ME 0 -#endif - -/* Spatial kernel - * - * Increasing the spatial denoising factor (SS) reduces the weight of further - * pixels. - * - * Spatial distortion instructs the spatial kernel to view that axis as - * closer/further, for instance SD=(1,1,0.5) would make the temporal axis - * appear closer and increase blur between frames. - * - * The intra-patch variants do not yet have well-understood effects. They are - * intended to make large patch sizes more useful. Likely slower. - * - * SS: spatial denoising factor - * SD: spatial distortion (X, Y, time) - * PSS: intra-patch spatial denoising factor - * PST: enables intra-patch spatial kernel if P>=PST, 0 fully disables - * PSD: intra-patch spatial distortion (X, Y) - */ -#ifdef LUMA_raw -#define SS 0.25 -#define SD vec3(1,1,1.5) -#define PST 0 -#define PSS 0.0 -#define PSD vec2(1,1) -#else -#define SS 0.25 -#define SD vec3(1,1,1.5) -#define PST 0 -#define PSS 0.0 -#define PSD vec2(1,1) -#endif - -// Scaling factor (should match WIDTH/HEIGHT) -#ifdef LUMA_raw -#define SF 1 -#else -#define SF 1 -#endif - -/* Estimator - * - * 0: means - * 1: Euclidean medians (extremely slow, may be good for heavy noise) - * 2: weight map (not a denoiser, maybe useful for generating image masks) - * 3: weighted median intensity (slow, may be good for heavy noise) - * 4: edge map (based on the relevant AS settings) - */ -#ifdef LUMA_raw -#define M 0 -#else -#define M 0 -#endif - -/* Difference visualization - * - * Visualizes the difference between input/output image - * - * 0: off - * 1: absolute difference scaled by S - * 2: difference centered on 0.5 - */ -#ifdef LUMA_raw -#define DV 0 -#else -#define DV 0 -#endif - -/* Blur factor - * - * 0 to 1, only useful for alternative estimators. You're probably looking for - * "S" (denoising factor), go back to the top of the shader! - */ -#ifdef LUMA_raw -#define BF 1.0 -#else -#define BF 1.0 -#endif - -// Force disable textureGather -#ifdef LUMA_raw -#define NG 0 -#else -#define NG 0 -#endif - -// Patch donut (probably useless) -#ifdef LUMA_raw -#define PD 0 -#else -#define PD 0 -#endif - -// Duplicate 1st weight (for LGC) -#ifdef LUMA_raw -#define D1W 0 -#else -#define D1W 0 -#endif - -/* Shader code */ - -#define EPSILON 0.00000000001 -#define M_PI 3.14159265358979323846 - -#if PS == 6 -const int hp = P/2; -#else -const float hp = int(P/2) - 0.5*(1-(P%2)); // sample between pixels for even patch sizes -#endif - -#if RS == 6 -const int hr = R/2; -#else -const float hr = int(R/2) - 0.5*(1-(R%2)); // sample between pixels for even research sizes -#endif - -// donut increment, increments without landing on (0,0,0) -// much faster than a "continue" statement -#define DINCR(z,c) (z.c++,(z.c += int(z == vec3(0)))) - -// search shapes and their corresponding areas -#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) - -#define S_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz); incr) -#define S_TRUNC_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz)*int(z.y!=0); incr) -#define S_TRIANGLE_A(hz,Z) int(hz*hz+Z) - -#define S_DIAMOND(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -abs(abs(z.x) - hz); z.y <= abs(abs(z.x) - hz); incr) -#define S_DIAMOND_A(hz,Z) int(hz*hz*2+Z) - -#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) -#define S_HORIZONTAL(z,hz,incr) for (z.x = -hz; z.x <= hz; incr) for (z.y = 0; z.y <= 0; z.y++) - -#define S_PLUS(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz * int(z.x == 0); z.y <= hz * int(z.x == 0); incr) -#define S_PLUS_A(hz,Z) (Z*2 - 1) - -#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) -#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) - -#define T1 (T+1) -#define FOR_FRAME(r) for (r.z = 0; r.z < T1; r.z++) - -// Skip comparing the pixel-of-interest against itself, unless RF is enabled -#if RF -#define RINCR(z,c) (z.c++) -#else -#define RINCR DINCR -#endif - -#define R_AREA(a) (a * T1 + RF-1) - -// research shapes -// XXX would be nice to have the option of temporally-varying research sizes -#if R == 0 || R == 1 -#define FOR_RESEARCH(r) S_1X1(r) -const int r_area = R_AREA(1); -#elif RS == 7 -#define FOR_RESEARCH(r) S_PLUS(r,hr,RINCR(r,y)) -const int r_area = R_AREA(S_PLUS_A(hr,R)); -#elif RS == 6 -#define FOR_RESEARCH(r) S_SQUARE_EVEN(r,hr,RINCR(r,y)) -const int r_area = R_AREA(R*R); -#elif RS == 5 -#define FOR_RESEARCH(r) S_TRUNC_TRIANGLE(r,hr,RINCR(r,x)) -const int r_area = R_AREA(S_TRIANGLE_A(hr,hr)); -#elif RS == 4 -#define FOR_RESEARCH(r) S_TRIANGLE(r,hr,RINCR(r,x)) -const int r_area = R_AREA(S_TRIANGLE_A(hr,R)); -#elif RS == 3 -#define FOR_RESEARCH(r) S_DIAMOND(r,hr,RINCR(r,y)) -const int r_area = R_AREA(S_DIAMOND_A(hr,R)); -#elif RS == 2 -#define FOR_RESEARCH(r) S_VERTICAL(r,hr,RINCR(r,y)) -const int r_area = R_AREA(R); -#elif RS == 1 -#define FOR_RESEARCH(r) S_HORIZONTAL(r,hr,RINCR(r,x)) -const int r_area = R_AREA(R); -#elif RS == 0 -#define FOR_RESEARCH(r) S_SQUARE(r,hr,RINCR(r,y)) -const int r_area = R_AREA(R*R); -#endif - -#define RI1 (RI+1) -#define RFI1 (RFI+1) - -#if RI -#define FOR_ROTATION for (float ri = 0; ri < 360; ri+=360.0/RI1) -#else -#define FOR_ROTATION -#endif - -#if RFI -#define FOR_REFLECTION for (int rfi = 0; rfi < RFI1; rfi++) -#else -#define FOR_REFLECTION -#endif - -#if PD -#define PINCR DINCR -#else -#define PINCR(z,c) (z.c++) -#endif - -#define P_AREA(a) (a - PD) - -// patch shapes -#if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) -const int p_area = P_AREA(1); -#elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) -const int p_area = P_AREA(S_PLUS_A(hp,P)); -#elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) -const int p_area = P_AREA(P*P); -#elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) -const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); -#elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) -const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); -#elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) -const int p_area = P_AREA(S_DIAMOND_A(hp,P)); -#elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) -const int p_area = P_AREA(P); -#elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) -const int p_area = P_AREA(P); -#elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) -const int p_area = P_AREA(P*P); -#endif - -const float r_scale = 1.0/r_area; -const float p_scale = 1.0/p_area; - -#define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) - -#if RF && defined(LUMA_raw) -#define load2_(off) _INJ_RF_LUMA_tex(_INJ_RF_LUMA_pos + _INJ_RF_LUMA_pt * vec2(off)) -#define gather_offs(off, off_arr) (_INJ_RF_LUMA_mul * vec4(textureGatherOffsets(_INJ_RF_LUMA_raw, _INJ_RF_LUMA_pos + vec2(off) * _INJ_RF_LUMA_pt, off_arr))) -#define gather(off) _INJ_RF_LUMA_gather(_INJ_RF_LUMA_pos + (off) * _INJ_RF_LUMA_pt, 0) -#elif RF && D1W -#define load2_(off) _INJ_RF_tex(_INJ_RF_pos + _INJ_RF_pt * vec2(off)) -#define gather_offs(off, off_arr) (_INJ_RF_mul * vec4(textureGatherOffsets(_INJ_RF_raw, _INJ_RF_pos + vec2(off) * _INJ_RF_pt, off_arr))) -#define gather(off) _INJ_RF_gather(_INJ_RF_pos + (off) * _INJ_RF_pt, 0) -#elif RF -#define load2_(off) _INJ_RF_tex(_INJ_RF_pos + _INJ_RF_pt * vec2(off)) -#else -#define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) -#define gather_offs(off, off_arr) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, off_arr))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) -#endif - -#if T -vec4 load(vec3 off) -{ - switch (int(off.z)) { - case 0: return load_(off); - } -} -vec4 load2(vec3 off) -{ - switch (int(off.z)) { - case 0: return load2_(off); - } -} -#else -#define load(off) load_(off) -#define load2(off) load2_(off) -#endif - -vec4 poi = load(vec3(0)); // pixel-of-interest -vec4 poi2 = load2(vec3(0)); // guide pixel-of-interest - -#if RI // rotation -vec2 rot(vec2 p, float d) -{ - return vec2( - p.x * cos(radians(d)) - p.y * sin(radians(d)), - p.y * sin(radians(d)) + p.x * cos(radians(d)) - ); -} -#else -#define rot(p, d) (p) -#endif - -#if RFI // reflection -vec2 ref(vec2 p, int d) -{ - switch (d) { - case 0: return p; - case 1: return p * vec2(1, -1); - case 2: return p * vec2(-1, 1); - } -} -#else -#define ref(p, d) (p) -#endif - -vec4 patch_comparison(vec3 r, vec3 r2) -{ - vec3 p; - vec4 min_rot = vec4(p_area); - - FOR_ROTATION FOR_REFLECTION { - vec4 pdiff_sq = vec4(0); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); - diff_sq *= diff_sq; -#if PST && P >= PST - float pdist = length(p.xy*PSD)*PSS; - pdist = exp(-(pdist*pdist)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); -#endif - pdiff_sq += diff_sq; - } - min_rot = min(min_rot, pdiff_sq); - } - - return min_rot * p_scale; -} - -#define NO_GATHER (PD == 0 && NG == 0) // never textureGather if any of these conditions are false -#define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) - -#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER -// 3x3 diamond/plus patch_comparison_gather -// XXX extend to support arbitrary sizes (probably requires code generation) -// XXX extend to support 3x3 square -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; -const ivec2 offsets_sf[4] = { ivec2(0,-1) * SF, ivec2(-1,0) * SF, ivec2(0,1) * SF, ivec2(1,0) * SF }; -vec4 poi_patch = gather_offs(0, offsets); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - float min_rot = p_area - 1; - vec4 transformer = gather_offs(r, offsets_sf); - FOR_ROTATION { - FOR_REFLECTION { - float diff_sq = dot((poi_patch - transformer) * (poi_patch - transformer), vec4(1)); - min_rot = min(diff_sq, min_rot); -#if RFI - switch(rfi) { - case 0: transformer = transformer.zyxw; break; - case 1: transformer = transformer.zwxy; break; // undoes last mirror, performs another mirror - case 2: transformer = transformer.zyxw; break; // undoes last mirror - } -#endif - } -#if RI == 3 - transformer = transformer.wxyz; -#elif RI == 1 - transformer = transformer.zwxy; -#endif - } - float center_diff_sq = poi2.x - load2(r).x; - center_diff_sq *= center_diff_sq; - return vec4(min_rot + center_diff_sq, 0, 0, 0) * p_scale; -} -#elif (defined(LUMA_gather) || D1W) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER -// tiled even square patch_comparison_gather -// XXX extend to support odd square? -// XXX rotations/reflections appear to be subtly broken -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - vec2 tile; - float min_rot = p_area; - - /* gather order: - * w z - * x y - */ - FOR_ROTATION FOR_REFLECTION { - float pdiff_sq = 0; - for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { - vec4 poi_patch = gather(tile + r2.xy); - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - -#if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees -#endif -#if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } -#endif - - vec4 diff_sq = (poi_patch - transformer) * (poi_patch - transformer); -#if PST && P >= PST - // XXX refactor to avoid pow (should probably break off into a function) - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq += dot(diff_sq, vec4(1)); - } - min_rot = min(min_rot, pdiff_sq); - } - - return vec4(min_rot, 0, 0, 0) * p_scale; -} -#else -#define patch_comparison_gather patch_comparison -#endif - -vec4 hook() -{ - vec4 total_weight = vec4(0); - vec4 sum = vec4(0); - vec4 result = vec4(0); - - vec3 r = vec3(0); - vec3 p = vec3(0); - vec3 me = vec3(0); - -#if T && ME == 1 // temporal & motion estimation - vec3 me_tmp = vec3(0); - float maxweight = 0; -#elif T && ME == 2 // temporal & motion estimation - vec3 me_sum = vec3(0); - float me_weight = 0; -#endif - -#if WD == 2 || M == 3 // weight discard, weighted median intensities - int r_index = 0; - vec4 all_weights[r_area]; - vec4 all_pixels[r_area]; -#elif WD == 1 // weight discard - vec4 no_weights = vec4(0); - vec4 discard_total_weight = vec4(0); - vec4 discard_sum = vec4(0); -#endif - -#if M == 1 // Euclidean medians - vec4 minsum = vec4(0); -#endif - - FOR_FRAME(r) { - // XXX ME is always a frame behind, should have to option to re-research after applying ME (could do it an arbitrary number of times per frame if desired) -#if T && ME == 1 // temporal & motion estimation max weight - if (r.z > 0) { - me += me_tmp; - me_tmp = vec3(0); - maxweight = 0; - } -#elif T && ME == 2 // temporal & motion estimation weighted average - if (r.z > 0) { - me += round(me_sum / me_weight); - me_sum = vec3(0); - me_weight = 0; - } -#endif - FOR_RESEARCH(r) { - // main NLM logic - const float h = S*0.013; - const float pdiff_scale = 1.0/(h*h); - vec4 pdiff_sq = (r.z == 0) ? patch_comparison_gather(r+me, vec3(0)) : patch_comparison(r+me, vec3(0)); - vec4 weight = exp(-pdiff_sq * pdiff_scale); - -#if T && ME == 1 // temporal & motion estimation max weight - me_tmp = vec3(r.xy,0) * step(maxweight, weight.x) + me_tmp * (1 - step(maxweight, weight.x)); - maxweight = max(maxweight, weight.x); -#elif T && ME == 2 // temporal & motion estimation weighted average - me_sum += vec3(r.xy,0) * weight.x; - me_weight += weight.x; -#endif - -#if D1W - weight = vec4(weight.x); -#endif - - weight *= exp(-(length(r*SD)*SS * length(r*SD)*SS)); // spatial kernel - -#if WD == 2 || M == 3 // weight discard, weighted median intensity - all_weights[r_index] = weight; - all_pixels[r_index] = load(r+me); - r_index++; -#elif WD == 1 // weight discard - vec4 wd_scale = 1.0/max(no_weights, 1); - vec4 keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); - discard_sum += load(r+me) * weight * (1 - keeps); - discard_total_weight += weight * (1 - keeps); - no_weights += keeps; -#endif - - sum += load(r+me) * weight; - total_weight += weight; - -#if M == 1 // Euclidean median - // Based on: https://arxiv.org/abs/1207.3056 - // XXX might not work with ME - vec3 r2; - vec4 wpdist_sum = vec4(0); - FOR_FRAME(r2) FOR_RESEARCH(r2) { - vec4 pdist = (r.z + r2.z) == 0 ? patch_comparison_gather(r+me, r2+me) : patch_comparison(r+me, r2+me); - wpdist_sum += sqrt(pdist) * (1-weight); - } - - vec4 newmin = step(wpdist_sum, minsum); // wpdist_sum <= minsum - newmin *= 1 - step(wpdist_sum, vec4(0)); // && wpdist_sum > 0 - newmin += step(minsum, vec4(0)); // || minsum <= 0 - newmin = min(newmin, 1); - - minsum = (newmin * wpdist_sum) + ((1-newmin) * minsum); - result = (newmin * load(r+me)) + ((1-newmin) * result); -#endif - } // FOR_RESEARCH - } // FOR_FRAME - - // XXX optionally put the denoised pixel into the frame buffer? -#if T // temporal -#endif - - vec4 avg_weight = total_weight * r_scale; - vec4 old_avg_weight = avg_weight; - -#if WD == 2 // true average - total_weight = vec4(0); - sum = vec4(0); - vec4 no_weights = vec4(0); - - for (int i = 0; i < r_area; i++) { - vec4 keeps = step(avg_weight*WDT, all_weights[i]); - all_weights[i] *= keeps; - sum += all_pixels[i] * all_weights[i]; - total_weight += all_weights[i]; - no_weights += keeps; - } -#elif WD == 1 // moving cumulative average - total_weight -= discard_total_weight; - sum -= discard_sum; -#endif -#if WD // weight discard - avg_weight = total_weight / no_weights; -#endif - - total_weight += SW; - sum += poi * SW; - -#if M == 3 // weighted median intensity - const float hr_area = r_area/2.0; - vec4 is_median, gt, lt, gte, lte, neq; - - for (int i = 0; i < r_area; i++) { - gt = lt = vec4(0); - for (int j = 0; j < r_area; j++) { - gte = step(all_pixels[i]*all_weights[i], all_pixels[j]*all_weights[j]); - lte = step(all_pixels[j]*all_weights[j], all_pixels[i]*all_weights[i]); - neq = 1 - gte * lte; - gt += gte * neq; - lt += lte * neq; - } - is_median = step(gt, vec4(hr_area)) * step(lt, vec4(hr_area)); - result += step(result, vec4(0)) * is_median * all_pixels[i]; - } -#elif M == 2 // weight map - result = avg_weight; -#elif M == 0 // mean - result = sum / total_weight; -#endif - -#if ASW == 0 // pre-WD weights -#define AS_weight old_avg_weight -#elif ASW == 1 // post-WD weights -#define AS_weight avg_weight -#endif - -#if ASK == 0 - vec4 sharpening_strength = pow(AS_weight, vec4(ASP)); -#elif ASK == 1 -#define sigmoid(x) (tanh(x * 2*M_PI - M_PI)*0.5+0.5) - vec4 sharpening_strength = mix(pow(sigmoid(AS_weight), vec4(ASP)), - AS_weight, ASC); - // just in case ASC < 0 (will sharpen but it's janky XXX) - sharpening_strength = clamp(sharpening_strength, 0.0, 1.0); -#elif ASK == 2 - vec4 sharpening_strength = vec4(ASP); -#endif - - // XXX maybe allow for alternative blurs? e.g., replace result w/ load2? -#if AS == 1 // sharpen+denoise - vec4 sharpened = result + (poi - result) * ASF; -#elif AS == 2 // sharpen only - vec4 sharpened = poi + (poi - result) * ASF; -#endif - -#if EP // extremes preserve -float luminance = _INJ_EP_texOff(0).x; - // EPSILON is needed since pow(0,0) is undefined - float ep_weight = pow(max(min(1-luminance, luminance)*2, EPSILON), (luminance < 0.5 ? DP : BP)); - result = mix(poi, result, ep_weight); -#endif - -#if AS == 1 // sharpen+denoise - result = mix(sharpened, result, sharpening_strength); -#elif AS == 2 // sharpen only - result = mix(sharpened, poi, sharpening_strength); -#endif - -#if M == 4 // edge map - result = sharpening_strength; -#endif - -#if (M == 2 || M == 4) && defined(CHROMA_raw) // drop chroma for weight maps - result = vec4(0.5); -#endif - -#if DV == 1 - result = clamp(abs(poi - result) * S, 0.0, 1.0); -#elif DV == 2 - result = (poi - result) * 0.5 + 0.5; -#endif - - return mix(poi, result, BF); -} - -// End of source code injected from nlmeans.glsl -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Non-local means (downscale) -//!WIDTH LUMA.w 3 / -//!HEIGHT LUMA.h 3 / -//!BIND LUMA -//!SAVE EP - -vec4 hook() -{ - return LUMA_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!DESC Non-local means (share) -//!BIND RF_LUMA -//!SAVE RF - -vec4 hook() -{ - return RF_LUMA_texOff(0); -} - -//!HOOK LUMA -//!HOOK CHROMA -//!BIND HOOKED -//!BIND RF_LUMA -//!BIND EP -//!BIND RF -//!DESC Non-local means (nlmeans_hq.glsl) - -/* User variables - * - * It is usually preferable to denoise chroma and luma differently, so the user - * variables for luma and chroma are split. - */ - -/* S = denoising factor - * P = patch size - * R = research size - * - * The denoising factor controls the level of blur, higher is blurrier. - * - * Patch size should usually be an odd number greater than or equal to 3. - * Higher values are slower and not always better. - * - * Research size usually be an odd number greater than or equal to 3. Higher - * values are usually better, but slower and offer diminishing returns. - * - * Even-numbered patch/research sizes will sample between pixels unless PS=6. - * It's not known whether this is ever useful behavior or not. This is - * incompatible with textureGather optimizations, so NG=1 to disable them. - */ -#ifdef LUMA_raw -#define S 2.25 -#define P 4 -#define R 5 -#else -#define S 5.0 -#define P 3 -#define R 5 -#endif - -/* Adaptive sharpening - * - * Uses the blur incurred by denoising to perform an unsharp mask, and uses the - * weight map to restrict the sharpening to edges. - * - * Use M=4 to get a good look at which areas are/aren't sharpened. - * - * AS: 2 for sharpening, 1 for sharpening+denoising, 0 to disable - * ASF: Sharpening factor, higher numbers make a sharper underlying image - * ASP: Weight power, higher numbers use more of the sharp image - * ASW: - * - 0 to use pre-WD weights - * - 1 to use post-WD weights (ASP should be ~2x to compensate) - * ASK: Weight kernel: - * - 0 for power. This is the old method. - * - 1 for sigmoid. This is generally recommended. - * - 2 for constant (non-adaptive, w/ ASP=0 this sharpens the entire image) - * ASC (only for ASK=1, range 0-1): Reduces the contrast of the edge map - */ -#ifdef LUMA_raw -#define AS 0 -#define ASF 2.0 -#define ASP 1 -#define ASW 0 -#define ASK 1 -#define ASC 0.0 -#else -#define AS 0 -#define ASF 2.0 -#define ASP 4.0 -#define ASW 0 -#define ASK 1 -#define ASC 0.0 -#endif - -/* Starting weight - * - * Lower numbers give less weight to the pixel-of-interest, which may help - * handle higher noise levels, ringing, and may be useful for other things too? - * - * EPSILON should be used instead of zero to avoid divide-by-zero errors. The - * avg_weight/old_avg_weight variables may be used to make SW adapt to the - * local noise level, e.g., SW=max(avg_weight, EPSILON) - */ -#ifdef LUMA_raw -#define SW 1.0 -#else -#define SW 0.5 -#endif - -/* Weight discard - * - * Discard weights that fall below a fraction of the average weight. This culls - * the most dissimilar samples from the blur, yielding a much more pleasant - * result, especially around edges. - * - * WD: - * - 2: True average. Very good quality, but slower and uses more memory. - * - 1: Moving cumulative average. Inaccurate, tends to blur directionally. - * - 0: Disable - * - * WDT: Threshold coefficient, higher numbers discard more - * WDP (only for WD=1): Increasing reduces the threshold for small sample sizes - */ -#ifdef LUMA_raw -#define WD 2 -#define WDT 0.5 -#define WDP 6.0 -#else -#define WD 2 -#define WDT 0.75 -#define WDP 6.0 -#endif - -/* Extremes preserve - * - * Reduces denoising around very bright/dark areas. The downscaling factor of - * EP (located near the top of this shader) controls the area sampled for - * luminance (higher numbers consider more area). - * - * This is incompatible with RGB. If you have RGB hooks enabled then you will - * have to delete the EP shader stage or specify EP=0 through nlmeans_cfg. - * - * EP: 1 to enable, 0 to disable - * DP: EP strength on dark patches, 0 to fully denoise - * BP: EP strength on bright patches, 0 to fully denoise - */ -#ifdef LUMA_raw -#define EP 1 -#define BP 0.75 -#define DP 0.25 -#else -#define EP 0 -#define BP 0.0 -#define DP 0.0 -#endif - -/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ - -/* Robust filtering - * - * This setting is dependent on code generation from nlmeans_cfg, so this - * setting can only be enabled via nlmeans_cfg. - * - * Compares the pixel-of-interest against a guide, which could be a downscaled - * image or the output of another shader such as guided.glsl - */ -#ifdef LUMA_raw -#define RF 1 -#else -#define RF 1 -#endif - -/* Search shape - * - * Determines the shape of patches and research zones. Different shapes have - * different speed and quality characteristics. Every shape (besides square) is - * smaller than square. - * - * PS applies applies to patches, RS applies to research zones. - * - * Be wary of gather optimizations (see the Regarding Speed comment at the top) - * - * 0: square (symmetrical) - * 1: horizontal line (asymmetric) - * 2: vertical line (asymmetric) - * 3: diamond (symmetrical) - * 4: triangle (asymmetric, pointing upward) - * 5: truncated triangle (asymmetric on two axis, last row halved) - * 6: even sized square (asymmetric on two axis) - * 7: plus (symmetrical) - */ -#ifdef LUMA_raw -#define RS 3 -#define PS 6 -#else -#define RS 3 -#define PS 3 -#endif - -/* Rotational/reflectional invariance - * - * Number of rotations/reflections to try for each patch comparison. Slow, but - * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. The most similar rotation/reflection will be used. - * - * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a - * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. - * - * RI: Rotational invariance - * RFI (0 to 2): Reflectional invariance - */ -#ifdef LUMA_raw -#define RI 0 -#define RFI 0 -#else -#define RI 0 -#define RFI 0 -#endif - -/* Temporal denoising - * - * Caveats: - * - Slower, each frame needs to be researched - * - Requires vo=gpu-next and nlmeans_temporal.glsl - * - Luma-only (this is a bug) - * - Buggy - * - * Gather samples across multiple frames. May cause motion blur and may - * struggle more with noise that persists across multiple frames (e.g., from - * compression or duplicate frames), but can work very well on high quality - * video. - * - * Motion estimation (ME) should improve quality without impacting speed. - * - * T: number of frames used - * ME: motion estimation, 0 for none, 1 for max weight, 2 for weighted avg - */ -#ifdef LUMA_raw -#define T 0 -#define ME 1 -#else -#define T 0 -#define ME 0 -#endif - -/* Spatial kernel - * - * Increasing the spatial denoising factor (SS) reduces the weight of further - * pixels. - * - * Spatial distortion instructs the spatial kernel to view that axis as - * closer/further, for instance SD=(1,1,0.5) would make the temporal axis - * appear closer and increase blur between frames. - * - * The intra-patch variants do not yet have well-understood effects. They are - * intended to make large patch sizes more useful. Likely slower. - * - * SS: spatial denoising factor - * SD: spatial distortion (X, Y, time) - * PSS: intra-patch spatial denoising factor - * PST: enables intra-patch spatial kernel if P>=PST, 0 fully disables - * PSD: intra-patch spatial distortion (X, Y) - */ -#ifdef LUMA_raw -#define SS 0.25 -#define SD vec3(1,1,1.5) -#define PST 0 -#define PSS 0.0 -#define PSD vec2(1,1) -#else -#define SS 0.25 -#define SD vec3(1,1,1.5) -#define PST 0 -#define PSS 0.0 -#define PSD vec2(1,1) -#endif - -// Scaling factor (should match WIDTH/HEIGHT) -#ifdef LUMA_raw -#define SF 1 -#else -#define SF 1 -#endif - -/* Estimator - * - * 0: means - * 1: Euclidean medians (extremely slow, may be good for heavy noise) - * 2: weight map (not a denoiser, maybe useful for generating image masks) - * 3: weighted median intensity (slow, may be good for heavy noise) - * 4: edge map (based on the relevant AS settings) - */ -#ifdef LUMA_raw -#define M 0 -#else -#define M 0 -#endif - -/* Difference visualization - * - * Visualizes the difference between input/output image - * - * 0: off - * 1: absolute difference scaled by S - * 2: difference centered on 0.5 - */ -#ifdef LUMA_raw -#define DV 0 -#else -#define DV 0 -#endif - -/* Blur factor - * - * 0 to 1, only useful for alternative estimators. You're probably looking for - * "S" (denoising factor), go back to the top of the shader! - */ -#ifdef LUMA_raw -#define BF 1.0 -#else -#define BF 1.0 -#endif - -// Force disable textureGather -#ifdef LUMA_raw -#define NG 0 -#else -#define NG 0 -#endif - -// Patch donut (probably useless) -#ifdef LUMA_raw -#define PD 0 -#else -#define PD 0 -#endif - -// Duplicate 1st weight (for LGC) -#ifdef LUMA_raw -#define D1W 0 -#else -#define D1W 0 -#endif - -/* Shader code */ - -#define EPSILON 0.00000000001 -#define M_PI 3.14159265358979323846 - -#if PS == 6 -const int hp = P/2; -#else -const float hp = int(P/2) - 0.5*(1-(P%2)); // sample between pixels for even patch sizes -#endif - -#if RS == 6 -const int hr = R/2; -#else -const float hr = int(R/2) - 0.5*(1-(R%2)); // sample between pixels for even research sizes -#endif - -// donut increment, increments without landing on (0,0,0) -// much faster than a "continue" statement -#define DINCR(z,c) (z.c++,(z.c += int(z == vec3(0)))) - -// search shapes and their corresponding areas -#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) - -#define S_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz); incr) -#define S_TRUNC_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz)*int(z.y!=0); incr) -#define S_TRIANGLE_A(hz,Z) int(hz*hz+Z) - -#define S_DIAMOND(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -abs(abs(z.x) - hz); z.y <= abs(abs(z.x) - hz); incr) -#define S_DIAMOND_A(hz,Z) int(hz*hz*2+Z) - -#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) -#define S_HORIZONTAL(z,hz,incr) for (z.x = -hz; z.x <= hz; incr) for (z.y = 0; z.y <= 0; z.y++) - -#define S_PLUS(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz * int(z.x == 0); z.y <= hz * int(z.x == 0); incr) -#define S_PLUS_A(hz,Z) (Z*2 - 1) - -#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) -#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) - -#define T1 (T+1) -#define FOR_FRAME(r) for (r.z = 0; r.z < T1; r.z++) - -// Skip comparing the pixel-of-interest against itself, unless RF is enabled -#if RF -#define RINCR(z,c) (z.c++) -#else -#define RINCR DINCR -#endif - -#define R_AREA(a) (a * T1 + RF-1) - -// research shapes -// XXX would be nice to have the option of temporally-varying research sizes -#if R == 0 || R == 1 -#define FOR_RESEARCH(r) S_1X1(r) -const int r_area = R_AREA(1); -#elif RS == 7 -#define FOR_RESEARCH(r) S_PLUS(r,hr,RINCR(r,y)) -const int r_area = R_AREA(S_PLUS_A(hr,R)); -#elif RS == 6 -#define FOR_RESEARCH(r) S_SQUARE_EVEN(r,hr,RINCR(r,y)) -const int r_area = R_AREA(R*R); -#elif RS == 5 -#define FOR_RESEARCH(r) S_TRUNC_TRIANGLE(r,hr,RINCR(r,x)) -const int r_area = R_AREA(S_TRIANGLE_A(hr,hr)); -#elif RS == 4 -#define FOR_RESEARCH(r) S_TRIANGLE(r,hr,RINCR(r,x)) -const int r_area = R_AREA(S_TRIANGLE_A(hr,R)); -#elif RS == 3 -#define FOR_RESEARCH(r) S_DIAMOND(r,hr,RINCR(r,y)) -const int r_area = R_AREA(S_DIAMOND_A(hr,R)); -#elif RS == 2 -#define FOR_RESEARCH(r) S_VERTICAL(r,hr,RINCR(r,y)) -const int r_area = R_AREA(R); -#elif RS == 1 -#define FOR_RESEARCH(r) S_HORIZONTAL(r,hr,RINCR(r,x)) -const int r_area = R_AREA(R); -#elif RS == 0 -#define FOR_RESEARCH(r) S_SQUARE(r,hr,RINCR(r,y)) -const int r_area = R_AREA(R*R); -#endif - -#define RI1 (RI+1) -#define RFI1 (RFI+1) - -#if RI -#define FOR_ROTATION for (float ri = 0; ri < 360; ri+=360.0/RI1) -#else -#define FOR_ROTATION -#endif - -#if RFI -#define FOR_REFLECTION for (int rfi = 0; rfi < RFI1; rfi++) -#else -#define FOR_REFLECTION -#endif - -#if PD -#define PINCR DINCR -#else -#define PINCR(z,c) (z.c++) -#endif - -#define P_AREA(a) (a - PD) - -// patch shapes -#if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) -const int p_area = P_AREA(1); -#elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) -const int p_area = P_AREA(S_PLUS_A(hp,P)); -#elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) -const int p_area = P_AREA(P*P); -#elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) -const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); -#elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) -const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); -#elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) -const int p_area = P_AREA(S_DIAMOND_A(hp,P)); -#elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) -const int p_area = P_AREA(P); -#elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) -const int p_area = P_AREA(P); -#elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) -const int p_area = P_AREA(P*P); -#endif - -const float r_scale = 1.0/r_area; -const float p_scale = 1.0/p_area; - -#define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) - -#if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) -#define gather_offs(off, off_arr) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, off_arr))) -#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off) * RF_LUMA_pt, 0) -#elif RF && D1W -#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) -#define gather_offs(off, off_arr) (RF_mul * vec4(textureGatherOffsets(RF_raw, RF_pos + vec2(off) * RF_pt, off_arr))) -#define gather(off) RF_gather(RF_pos + (off) * RF_pt, 0) -#elif RF -#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) -#else -#define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) -#define gather_offs(off, off_arr) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, off_arr))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) -#endif - -#if T -vec4 load(vec3 off) -{ - switch (int(off.z)) { - case 0: return load_(off); - } -} -vec4 load2(vec3 off) -{ - switch (int(off.z)) { - case 0: return load2_(off); - } -} -#else -#define load(off) load_(off) -#define load2(off) load2_(off) -#endif - -vec4 poi = load(vec3(0)); // pixel-of-interest -vec4 poi2 = load2(vec3(0)); // guide pixel-of-interest - -#if RI // rotation -vec2 rot(vec2 p, float d) -{ - return vec2( - p.x * cos(radians(d)) - p.y * sin(radians(d)), - p.y * sin(radians(d)) + p.x * cos(radians(d)) - ); -} -#else -#define rot(p, d) (p) -#endif - -#if RFI // reflection -vec2 ref(vec2 p, int d) -{ - switch (d) { - case 0: return p; - case 1: return p * vec2(1, -1); - case 2: return p * vec2(-1, 1); - } -} -#else -#define ref(p, d) (p) -#endif - -vec4 patch_comparison(vec3 r, vec3 r2) -{ - vec3 p; - vec4 min_rot = vec4(p_area); - - FOR_ROTATION FOR_REFLECTION { - vec4 pdiff_sq = vec4(0); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); - diff_sq *= diff_sq; -#if PST && P >= PST - float pdist = length(p.xy*PSD)*PSS; - pdist = exp(-(pdist*pdist)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); -#endif - pdiff_sq += diff_sq; - } - min_rot = min(min_rot, pdiff_sq); - } - - return min_rot * p_scale; -} - -#define NO_GATHER (PD == 0 && NG == 0) // never textureGather if any of these conditions are false -#define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) - -#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER -// 3x3 diamond/plus patch_comparison_gather -// XXX extend to support arbitrary sizes (probably requires code generation) -// XXX extend to support 3x3 square -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; -const ivec2 offsets_sf[4] = { ivec2(0,-1) * SF, ivec2(-1,0) * SF, ivec2(0,1) * SF, ivec2(1,0) * SF }; -vec4 poi_patch = gather_offs(0, offsets); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - float min_rot = p_area - 1; - vec4 transformer = gather_offs(r, offsets_sf); - FOR_ROTATION { - FOR_REFLECTION { - float diff_sq = dot((poi_patch - transformer) * (poi_patch - transformer), vec4(1)); - min_rot = min(diff_sq, min_rot); -#if RFI - switch(rfi) { - case 0: transformer = transformer.zyxw; break; - case 1: transformer = transformer.zwxy; break; // undoes last mirror, performs another mirror - case 2: transformer = transformer.zyxw; break; // undoes last mirror - } -#endif - } -#if RI == 3 - transformer = transformer.wxyz; -#elif RI == 1 - transformer = transformer.zwxy; -#endif - } - float center_diff_sq = poi2.x - load2(r).x; - center_diff_sq *= center_diff_sq; - return vec4(min_rot + center_diff_sq, 0, 0, 0) * p_scale; -} -#elif (defined(LUMA_gather) || D1W) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER -// tiled even square patch_comparison_gather -// XXX extend to support odd square? -// XXX rotations/reflections appear to be subtly broken -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - vec2 tile; - float min_rot = p_area; - - /* gather order: - * w z - * x y - */ - FOR_ROTATION FOR_REFLECTION { - float pdiff_sq = 0; - for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { - vec4 poi_patch = gather(tile + r2.xy); - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - -#if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees -#endif -#if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } -#endif - - vec4 diff_sq = (poi_patch - transformer) * (poi_patch - transformer); -#if PST && P >= PST - // XXX refactor to avoid pow (should probably break off into a function) - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq += dot(diff_sq, vec4(1)); - } - min_rot = min(min_rot, pdiff_sq); - } - - return vec4(min_rot, 0, 0, 0) * p_scale; -} -#else -#define patch_comparison_gather patch_comparison -#endif - -vec4 hook() -{ - vec4 total_weight = vec4(0); - vec4 sum = vec4(0); - vec4 result = vec4(0); - - vec3 r = vec3(0); - vec3 p = vec3(0); - vec3 me = vec3(0); - -#if T && ME == 1 // temporal & motion estimation - vec3 me_tmp = vec3(0); - float maxweight = 0; -#elif T && ME == 2 // temporal & motion estimation - vec3 me_sum = vec3(0); - float me_weight = 0; -#endif - -#if WD == 2 || M == 3 // weight discard, weighted median intensities - int r_index = 0; - vec4 all_weights[r_area]; - vec4 all_pixels[r_area]; -#elif WD == 1 // weight discard - vec4 no_weights = vec4(0); - vec4 discard_total_weight = vec4(0); - vec4 discard_sum = vec4(0); -#endif - -#if M == 1 // Euclidean medians - vec4 minsum = vec4(0); -#endif - - FOR_FRAME(r) { - // XXX ME is always a frame behind, should have to option to re-research after applying ME (could do it an arbitrary number of times per frame if desired) -#if T && ME == 1 // temporal & motion estimation max weight - if (r.z > 0) { - me += me_tmp; - me_tmp = vec3(0); - maxweight = 0; - } -#elif T && ME == 2 // temporal & motion estimation weighted average - if (r.z > 0) { - me += round(me_sum / me_weight); - me_sum = vec3(0); - me_weight = 0; - } -#endif - FOR_RESEARCH(r) { - // main NLM logic - const float h = S*0.013; - const float pdiff_scale = 1.0/(h*h); - vec4 pdiff_sq = (r.z == 0) ? patch_comparison_gather(r+me, vec3(0)) : patch_comparison(r+me, vec3(0)); - vec4 weight = exp(-pdiff_sq * pdiff_scale); - -#if T && ME == 1 // temporal & motion estimation max weight - me_tmp = vec3(r.xy,0) * step(maxweight, weight.x) + me_tmp * (1 - step(maxweight, weight.x)); - maxweight = max(maxweight, weight.x); -#elif T && ME == 2 // temporal & motion estimation weighted average - me_sum += vec3(r.xy,0) * weight.x; - me_weight += weight.x; -#endif - -#if D1W - weight = vec4(weight.x); -#endif - - weight *= exp(-(length(r*SD)*SS * length(r*SD)*SS)); // spatial kernel - -#if WD == 2 || M == 3 // weight discard, weighted median intensity - all_weights[r_index] = weight; - all_pixels[r_index] = load(r+me); - r_index++; -#elif WD == 1 // weight discard - vec4 wd_scale = 1.0/max(no_weights, 1); - vec4 keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); - discard_sum += load(r+me) * weight * (1 - keeps); - discard_total_weight += weight * (1 - keeps); - no_weights += keeps; -#endif - - sum += load(r+me) * weight; - total_weight += weight; - -#if M == 1 // Euclidean median - // Based on: https://arxiv.org/abs/1207.3056 - // XXX might not work with ME - vec3 r2; - vec4 wpdist_sum = vec4(0); - FOR_FRAME(r2) FOR_RESEARCH(r2) { - vec4 pdist = (r.z + r2.z) == 0 ? patch_comparison_gather(r+me, r2+me) : patch_comparison(r+me, r2+me); - wpdist_sum += sqrt(pdist) * (1-weight); - } - - vec4 newmin = step(wpdist_sum, minsum); // wpdist_sum <= minsum - newmin *= 1 - step(wpdist_sum, vec4(0)); // && wpdist_sum > 0 - newmin += step(minsum, vec4(0)); // || minsum <= 0 - newmin = min(newmin, 1); - - minsum = (newmin * wpdist_sum) + ((1-newmin) * minsum); - result = (newmin * load(r+me)) + ((1-newmin) * result); -#endif - } // FOR_RESEARCH - } // FOR_FRAME - - // XXX optionally put the denoised pixel into the frame buffer? -#if T // temporal -#endif - - vec4 avg_weight = total_weight * r_scale; - vec4 old_avg_weight = avg_weight; - -#if WD == 2 // true average - total_weight = vec4(0); - sum = vec4(0); - vec4 no_weights = vec4(0); - - for (int i = 0; i < r_area; i++) { - vec4 keeps = step(avg_weight*WDT, all_weights[i]); - all_weights[i] *= keeps; - sum += all_pixels[i] * all_weights[i]; - total_weight += all_weights[i]; - no_weights += keeps; - } -#elif WD == 1 // moving cumulative average - total_weight -= discard_total_weight; - sum -= discard_sum; -#endif -#if WD // weight discard - avg_weight = total_weight / no_weights; -#endif - - total_weight += SW; - sum += poi * SW; - -#if M == 3 // weighted median intensity - const float hr_area = r_area/2.0; - vec4 is_median, gt, lt, gte, lte, neq; - - for (int i = 0; i < r_area; i++) { - gt = lt = vec4(0); - for (int j = 0; j < r_area; j++) { - gte = step(all_pixels[i]*all_weights[i], all_pixels[j]*all_weights[j]); - lte = step(all_pixels[j]*all_weights[j], all_pixels[i]*all_weights[i]); - neq = 1 - gte * lte; - gt += gte * neq; - lt += lte * neq; - } - is_median = step(gt, vec4(hr_area)) * step(lt, vec4(hr_area)); - result += step(result, vec4(0)) * is_median * all_pixels[i]; - } -#elif M == 2 // weight map - result = avg_weight; -#elif M == 0 // mean - result = sum / total_weight; -#endif - -#if ASW == 0 // pre-WD weights -#define AS_weight old_avg_weight -#elif ASW == 1 // post-WD weights -#define AS_weight avg_weight -#endif - -#if ASK == 0 - vec4 sharpening_strength = pow(AS_weight, vec4(ASP)); -#elif ASK == 1 -#define sigmoid(x) (tanh(x * 2*M_PI - M_PI)*0.5+0.5) - vec4 sharpening_strength = mix(pow(sigmoid(AS_weight), vec4(ASP)), - AS_weight, ASC); - // just in case ASC < 0 (will sharpen but it's janky XXX) - sharpening_strength = clamp(sharpening_strength, 0.0, 1.0); -#elif ASK == 2 - vec4 sharpening_strength = vec4(ASP); -#endif - - // XXX maybe allow for alternative blurs? e.g., replace result w/ load2? -#if AS == 1 // sharpen+denoise - vec4 sharpened = result + (poi - result) * ASF; -#elif AS == 2 // sharpen only - vec4 sharpened = poi + (poi - result) * ASF; -#endif - -#if EP // extremes preserve - float luminance = EP_texOff(0).x; - // EPSILON is needed since pow(0,0) is undefined - float ep_weight = pow(max(min(1-luminance, luminance)*2, EPSILON), (luminance < 0.5 ? DP : BP)); - result = mix(poi, result, ep_weight); -#endif - -#if AS == 1 // sharpen+denoise - result = mix(sharpened, result, sharpening_strength); -#elif AS == 2 // sharpen only - result = mix(sharpened, poi, sharpening_strength); -#endif - -#if M == 4 // edge map - result = sharpening_strength; -#endif - -#if (M == 2 || M == 4) && defined(CHROMA_raw) // drop chroma for weight maps - result = vec4(0.5); -#endif - -#if DV == 1 - result = clamp(abs(poi - result) * S, 0.0, 1.0); -#elif DV == 2 - result = (poi - result) * 0.5 + 0.5; -#endif - - return mix(poi, result, BF); -} - diff --git a/portable_config/shaders/nlmeans_hqx.glsl b/portable_config/shaders/nlmeans_hqx.glsl new file mode 100644 index 00000000..d9b0a96e --- /dev/null +++ b/portable_config/shaders/nlmeans_hqx.glsl @@ -0,0 +1,1288 @@ +/* vi: ft=c + * + * Based on vf_nlmeans.c from FFmpeg. + * + * Copyright (c) 2022 an3223 + * Copyright (c) 2016 Clément Bœsch + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 2.1 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +// Description: nlmeans_hqx.glsl: Very slow, should offer the best quality. + +/* The recommended usage of this shader and its variant profiles is to add them + * to input.conf and then dispatch the appropriate shader via a keybind during + * media playback. Here is an example input.conf entry: + * + * F4 no-osd change-list glsl-shaders toggle "~~/shaders/nlmeans_luma.glsl"; show-text "Non-local means (LUMA only)" + * + * These shaders can also be enabled by default in mpv.conf, for example: + * + * glsl-shaders='~~/shaders/nlmeans.glsl' + * + * Both of the examples above assume the shaders are located in a subdirectory + * named "shaders" within mpv's config directory. Refer to the mpv + * documentation for more details. + * + * This shader is highly configurable via user variables below. Although the + * default settings should offer good quality at a reasonable speed, you are + * encouraged to tweak them to your preferences. Be mindful that certain + * settings may greatly affect speed. + * + * Denoising is most useful for noisy content. If there is no perceptible + * noise, you probably won't see a positive difference. + * + * The default settings are generally tuned for low noise and high detail + * preservation. The "medium" and "heavy" profiles are tuned for higher levels + * of noise. + * + * The denoiser will not work properly if the content has been upscaled + * beforehand (whether it was done by you or not). In such cases, consider + * issuing a command to downscale in the mpv console (backtick ` key): + * + * vf toggle scale=-2:720 + * + * ...replacing 720 with whatever resolution seems appropriate. Rerun the + * command to undo the downscale. It may take some trial-and-error to find the + * proper resolution. + */ + +/* Regarding speed + * + * Speed may vary wildly for different vo and gpu-api settings. Generally + * vo=gpu-next and gpu-api=vulkan are recommended for the best speed, but this + * may be different for your system. + * + * If your GPU doesn't support textureGather, or if you are on a version of mpv + * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ profile + * + * If you plan on tinkering with NLM's settings, read below: + * + * textureGather only applies to luma and limited to the these configurations: + * + * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2} + * - Default, very fast, rotations and reflections should be free + * - If this is unusually slow then try changing gpu-api and vo + * - If it's still slow, try setting RI/RFI to 0. + * + * - PS=6:RI={0,1,3}:RFI={0,1,2} + * - Currently the only scalable variant + * - Patch shape is asymmetric on two axis + * - Rotations should have very little speed impact + * - Reflections may have a significant speed impact + * + * Options which always disable textureGather: + * - PD + * - NG + */ + +// The following is shader code injected from guided.glsl +/* vi: ft=c + * + * Copyright (c) 2022 an3223 + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 2.1 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +// Description: guided.glsl: Guided by the downscaled image + +/* The radius can be adjusted with the MEANI stage's downscaling factor. + * Higher numbers give a bigger radius. + * + * The E variable can be found in the A stage. + * + * The subsampling (fast guided filter) can be adjusted with the I stage's + * downscaling factor. Higher numbers are faster. + * + * The guide's subsampling can be adjusted with the PREI stage's downscaling + * factor. Higher numbers downscale more. + */ + +//!HOOK LUMA +//!HOOK CHROMA +//!BIND HOOKED +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / +//!DESC Guided filter (PREI) +//!SAVE _INJ_PREI + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!BIND _INJ_PREI +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h +//!DESC Guided filter (I) +//!SAVE _INJ_I + +vec4 hook() +{ +return _INJ_PREI_texOff(0); +} + + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter (P) +//!BIND HOOKED +//!WIDTH _INJ_I.w +//!HEIGHT _INJ_I.h +//!SAVE _INJ_P + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter (MEANI) +//!BIND _INJ_I +//!WIDTH _INJ_I.w 1.5 / +//!HEIGHT _INJ_I.h 1.5 / +//!SAVE _INJ_MEANI + +vec4 hook() +{ +return _INJ_I_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter (MEANP) +//!BIND _INJ_P +//!WIDTH _INJ_MEANI.w +//!HEIGHT _INJ_MEANI.h +//!SAVE _INJ_MEANP + +vec4 hook() +{ +return _INJ_P_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter (_INJ_I_SQ) +//!BIND _INJ_I +//!WIDTH _INJ_I.w +//!HEIGHT _INJ_I.h +//!SAVE _INJ_I_SQ + +vec4 hook() +{ +return _INJ_I_texOff(0) * _INJ_I_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter (_INJ_IXP) +//!BIND _INJ_I +//!BIND _INJ_P +//!WIDTH _INJ_I.w +//!HEIGHT _INJ_I.h +//!SAVE _INJ_IXP + +vec4 hook() +{ +return _INJ_I_texOff(0) * _INJ_P_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter (CORRI) +//!BIND _INJ_I_SQ +//!WIDTH _INJ_MEANI.w +//!HEIGHT _INJ_MEANI.h +//!SAVE _INJ_CORRI + +vec4 hook() +{ +return _INJ_I_SQ_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter (CORRP) +//!BIND _INJ_IXP +//!WIDTH _INJ_MEANI.w +//!HEIGHT _INJ_MEANI.h +//!SAVE _INJ_CORRP + +vec4 hook() +{ +return _INJ_IXP_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter (A) +//!BIND _INJ_MEANI +//!BIND _INJ_MEANP +//!BIND _INJ_CORRI +//!BIND _INJ_CORRP +//!WIDTH _INJ_I.w +//!HEIGHT _INJ_I.h +//!SAVE _INJ_A + +#define E 0.0013 + +vec4 hook() +{ +vec4 var = _INJ_CORRI_texOff(0) - _INJ_MEANI_texOff(0) * _INJ_MEANI_texOff(0); +vec4 cov = _INJ_CORRP_texOff(0) - _INJ_MEANI_texOff(0) * _INJ_MEANP_texOff(0); + return cov / (var + E); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter (B) +//!BIND _INJ_A +//!BIND _INJ_MEANI +//!BIND _INJ_MEANP +//!WIDTH _INJ_I.w +//!HEIGHT _INJ_I.h +//!SAVE _INJ_B + +vec4 hook() +{ +return _INJ_MEANP_texOff(0) - _INJ_A_texOff(0) * _INJ_MEANI_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter (MEANA) +//!BIND _INJ_A +//!WIDTH _INJ_MEANI.w +//!HEIGHT _INJ_MEANI.h +//!SAVE _INJ_MEANA + +vec4 hook() +{ +return _INJ_A_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter (MEANB) +//!BIND _INJ_B +//!WIDTH _INJ_MEANI.w +//!HEIGHT _INJ_MEANI.h +//!SAVE _INJ_MEANB + +vec4 hook() +{ +return _INJ_B_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!DESC Guided filter +//!BIND HOOKED +//!BIND _INJ_MEANA +//!BIND _INJ_MEANB +//!SAVE RF_LUMA + +vec4 hook() +{ +return _INJ_MEANA_texOff(0) * HOOKED_texOff(0) + _INJ_MEANB_texOff(0); +} + +// End of source code injected from guided.glsl + +//!HOOK LUMA +//!HOOK CHROMA +//!BIND RF_LUMA +//!WIDTH RF_LUMA.w +//!HEIGHT RF_LUMA.h +//!DESC Non-local means (RF, share) +//!SAVE RF + +vec4 hook() +{ + return RF_LUMA_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!BIND LUMA +//!WIDTH LUMA.w 3 / +//!HEIGHT LUMA.h 3 / +//!DESC Non-local means (EP) +//!SAVE EP + +vec4 hook() +{ + return LUMA_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!BIND HOOKED +//!BIND RF_LUMA +//!BIND RF +//!BIND EP +//!DESC Non-local means (nlmeans_hqx.glsl) + +// User variables + +// It is generally preferable to denoise luma and chroma differently, so the +// user variables for luma and chroma are split. + +// Denoising factor (level of blur, higher means more blur) +#ifdef LUMA_raw +#define S 2.25 +#else +#define S 5.0 +#endif + +/* Adaptive sharpening + * + * Uses the blur incurred by denoising to perform an unsharp mask, and uses the + * weight map to restrict the sharpening to edges. + * + * If you just want to increase/decrease sharpness then you want to change ASF. + * + * Use V=4 to visualize which areas are sharpened (black means sharpen). + * + * AS: + * - 0 to disable + * - 1 to sharpen+denoise + * - 2 to sharpen only + * ASF: Higher numbers make a sharper image + * ASP: Higher numbers use more of the sharp image + * ASW: + * - 0 to use pre-WD weights + * - 1 to use post-WD weights (ASP should be ~2x to compensate) + * ASK: Weight kernel: + * - 0 for power. This is the old method. + * - 1 for sigmoid. This is generally recommended. + * - 2 for constant (non-adaptive, w/ ASP=0 this sharpens the entire image) + * ASC (only for ASK=1, range 0-1): Reduces the contrast of the edge map + */ +#ifdef LUMA_raw +#define AS 0 +#define ASF 3.0 +#define ASP 1 +#define ASW 0 +#define ASK 1 +#define ASC 0.0 +#else +#define AS 0 +#define ASF 3.0 +#define ASP 1.0 +#define ASW 0 +#define ASK 1 +#define ASC 0.0 +#endif + +/* Starting weight + * + * Also known as the center weight. This represents the weight of the + * pixel-of-interest. Lower numbers may help handle heavy noise & ringing. + * + * EPSILON should be used instead of zero to avoid divide-by-zero errors. + */ +#ifdef LUMA_raw +#define SW 1.0 +#else +#define SW 0.5 +#endif + +/* Weight discard + * + * Discard weights that fall below a fraction of the average weight. This culls + * the most dissimilar samples from the blur, yielding a much more pleasant + * result, especially around edges. + * + * WD: + * - 2: True average. Better quality, but slower and requires GLSL 4.0 or later + * - 1: Moving cumulative average. Inaccurate, tends to blur directionally. + * - 0: Disable + * + * WDT: Threshold coefficient, higher numbers discard more + * WDP (only for WD=1): Increasing reduces the threshold for small sample sizes + */ +#ifdef LUMA_raw +#define WD 2 +#define WDT 0.5 +#define WDP 6.0 +#else +#define WD 2 +#define WDT 0.75 +#define WDP 6.0 +#endif + +/* Extremes preserve + * + * Reduces denoising around very bright/dark areas. + * + * The downscaling factor of the EP shader stage affects what is considered a + * bright/dark area. The default of 3 should be fine, it's not recommended to + * change this. + * + * This is incompatible with RGB. If you have RGB hooks enabled then you will + * have to delete the EP shader stage or specify EP=0 through shader_cfg. + * + * EP: 1 to enable, 0 to disable + * DP: EP strength on dark patches, 0 to fully denoise + * BP: EP strength on bright patches, 0 to fully denoise + */ +#ifdef LUMA_raw +#define EP 1 +#define BP 0.75 +#define DP 0.25 +#else +#define EP 0 +#define BP 0.0 +#define DP 0.0 +#endif + +/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ +/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ +/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ +/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ +/* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ + +/* Patch & research sizes + * + * Patch size should be an odd number greater than or equal to 3. Higher values + * are slower and not always better. + * + * Research size be an odd number greater than or equal to 3. Higher values are + * generally better, but slower, blurrier, and gives diminishing returns. + */ +#ifdef LUMA_raw +#define P 5 +#define R 5 +#else +#define P 5 +#define R 5 +#endif + +/* Patch and research shapes + * + * Different shapes have different speed and quality characteristics. Every + * shape (besides square) is smaller than square. + * + * PS applies applies to patches, RS applies to research zones. + * + * Be wary of gather optimizations (see the Regarding Speed comment at the top) + * + * 0: square (symmetrical) + * 1: horizontal line (asymmetric) + * 2: vertical line (asymmetric) + * 3: diamond (symmetrical) + * 4: triangle (asymmetric, pointing upward) + * 5: truncated triangle (asymmetric on two axis, last row halved) + * 6: even sized square (asymmetric on two axis) + * 7: plus (symmetrical) + */ +#ifdef LUMA_raw +#define RS 3 +#define PS 6 +#else +#define RS 3 +#define PS 3 +#endif + +/* Robust filtering + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. + * + * Compares the pixel-of-interest against a guide, which could be a downscaled + * image or the output of another shader + */ +#define RF_LUMA 1 +#define RF 1 + +/* Rotational/reflectional invariance + * + * Number of rotations/reflections to try for each patch comparison. Can be + * slow, but improves feature preservation. More rotations/reflections gives + * diminishing returns. The most similar rotation/reflection will be used. + * + * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a + * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. + * + * RI: Rotational invariance + * RFI (0 to 2): Reflectional invariance + */ +#ifdef LUMA_raw +#define RI 0 +#define RFI 0 +#else +#define RI 0 +#define RFI 0 +#endif + +/* Temporal denoising + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. + * + * Caveats: + * - Slower: + * - Each frame needs to be researched (more samples & more math) + * - Gather optimizations only apply to the current frame + * - Requires vo=gpu-next + * - Luma-only (this is a bug) + * - Buggy + * + * May cause motion blur and may struggle more with noise that persists across + * multiple frames (e.g., from compression or duplicate frames), but can work + * very well on high quality video. + * + * Motion estimation (ME) should improve quality without impacting speed. + * + * T: number of frames used + * ME: motion estimation, 0 for none, 1 for max weight, 2 for weighted avg + * MEF: estimate factor, compensates for ME being one frame behind + * TRF: compare against the denoised frames + */ +#ifdef LUMA_raw +#define T 0 +#define ME 1 +#define MEF 2 +#define TRF 0 +#else +#define T 0 +#define ME 0 +#define MEF 2 +#define TRF 0 +#endif + +/* Spatial kernel + * + * Increasing the spatial denoising factor (SS) reduces the weight of further + * pixels. + * + * Spatial distortion instructs the spatial kernel to view that axis as + * closer/further, for instance SD=(1,1,0.5) would make the temporal axis + * appear closer and increase blur between frames. + * + * The intra-patch variants are supposed to help with larger patch sizes. + * + * SST: enables spatial kernel if R>=PST, 0 fully disables + * SS: spatial sigma + * SD: spatial distortion (X, Y, time) + * PSS: intra-patch spatial sigma + * PST: enables intra-patch spatial kernel if P>=PST, 0 fully disables + * PSD: intra-patch spatial distortion (X, Y) + */ +#ifdef LUMA_raw +#define SST 1 +#define SS 0.25 +#define SD vec3(1,1,1) +#define PST 0 +#define PSS 0.0 +#define PSD vec2(1,1) +#else +#define SST 1 +#define SS 0.25 +#define SD vec3(1,1,1) +#define PST 0 +#define PSS 0.0 +#define PSD vec2(1,1) +#endif + +/* Kernels + * + * SK: spatial kernel + * RK: range kernel (takes patch differences) + * PSK: intra-patch spatial kernel + * + * List of available kernels: + * + * bicubic + * cos + * gaussian + * lanczos + * quadratic + * sinc + * sphinx + */ +#ifdef LUMA_raw +#define SK gaussian +#define RK gaussian +#define PSK gaussian +#else +#define SK gaussian +#define RK gaussian +#define PSK gaussian +#endif + +// Scaling factor (should match WIDTH/HEIGHT) +#ifdef LUMA_raw +#define SF 1 +#else +#define SF 1 +#endif + +/* Visualization + * + * 0: off + * 1: absolute difference between input/output to the power of 0.25 + * 2: difference between input/output centered on 0.5 + * 3: avg_weight + * 4: edge map (based on the relevant AS settings) + */ +#ifdef LUMA_raw +#define V 0 +#else +#define V 0 +#endif + +// Blur factor (0.0 returns the input image, 1.0 returns the output image) +#ifdef LUMA_raw +#define BF 1.0 +#else +#define BF 1.0 +#endif + +// Force disable textureGather +#ifdef LUMA_raw +#define NG 0 +#else +#define NG 0 +#endif + +// Patch donut (probably useless) +#ifdef LUMA_raw +#define PD 0 +#else +#define PD 0 +#endif + +// Duplicate 1st weight (for luma-guided-chroma) +#ifdef LUMA_raw +#define D1W 0 +#else +#define D1W 0 +#endif + +// Skip patch comparison +#ifdef LUMA_raw +#define SKIP_PATCH 0 +#else +#define SKIP_PATCH 0 +#endif + +// Shader code + +#define EPSILON 0.00000000001 +#define M_PI 3.14159265358979323846 +#define POW2(x) ((x)*(x)) +#define POW3(x) ((x)*(x)*(x)) +#define bicubic(x) ((1.0/6.0) * (POW3((x)+2) - 4 * POW3((x)+1) + 6 * POW3(x) - 4 * POW3(max((x)-1, 0)))) +#define gaussian(x) exp(-1 * POW2(x)) +#define lanczos(x) POW2(sinc(x)) +#define quadratic(x) ((x) < 0.5 ? 0.75 - POW2(x) : 0.5 * POW2((x) - 1.5)) +#define sinc(x) ((x) < 1e-8 ? 1.0 : sin((x)*M_PI) / ((x)*M_PI)) +#define sphinx(x) ((x) < 1e-8 ? 1.0 : 3.0 * (sin((x)*M_PI) - (x)*M_PI * cos((x)*M_PI)) / POW3((x)*M_PI)) + +// XXX could maybe be better optimized on LGC +// XXX return original alpha component instead of 1.0 +#if defined(LUMA_raw) +#define val float +#define val_swizz(v) (v.x) +#define unval(v) vec4(v.x, 0, 0, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#elif defined(CHROMA_raw) +#define val vec2 +#define val_swizz(v) (v.xy) +#define unval(v) vec4(v.x, v.y, 0, 1.0) +#define val_packed uint +#define val_pack(v) packUnorm2x16(v) +#define val_unpack(v) unpackUnorm2x16(v) +#else +#define val vec3 +#define val_swizz(v) (v.xyz) +#define unval(v) vec4(v.x, v.y, v.z, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#endif + +#if PS == 6 +const int hp = P/2; +#else +const float hp = int(P/2) - 0.5*(1-(P%2)); // sample between pixels for even patch sizes +#endif + +#if RS == 6 +const int hr = R/2; +#else +const float hr = int(R/2) - 0.5*(1-(R%2)); // sample between pixels for even research sizes +#endif + +// donut increment, increments without landing on (0,0,0) +// much faster than a continue statement +#define DINCR(z,c) (z.c++,(z.c += int(z == vec3(0)))) + +// patch/research shapes +// each shape is depicted in a comment, where Z=5 (Z corresponds to P or R) +// dots (.) represent samples (pixels) and X represents the pixel-of-interest + +// Z ..... +// Z ..... +// Z ..X.. +// Z ..... +// Z ..... +#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) + +// (in this instance Z=4) +// Z .... +// Z .... +// Z ..X. +// Z .... +#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) + +// Z-4 . +// Z-2 ... +// Z ..X.. +#define S_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz); incr) + +// Z-4 . +// Z-2 ... +// hz+1 ..X +#define S_TRUNC_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz)*int(z.y!=0); incr) +#define S_TRIANGLE_A(hz,Z) int(hz*hz+Z) + +// Z-4 . +// Z-2 ... +// Z ..X.. +// Z-2 ... +// Z-4 . +#define S_DIAMOND(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -abs(abs(z.x) - hz); z.y <= abs(abs(z.x) - hz); incr) +#define S_DIAMOND_A(hz,Z) int(hz*hz*2+Z) + +// +// Z ..X.. +// +#define S_HORIZONTAL(z,hz,incr) for (z.x = -hz; z.x <= hz; incr) for (z.y = 0; z.y <= 0; z.y++) + +// 90 degree rotation of S_HORIZONTAL +#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) + +// 1 . +// 1 . +// Z ..X.. +// 1 . +// 1 . +#define S_PLUS(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz * int(z.x == 0); z.y <= hz * int(z.x == 0); incr) +#define S_PLUS_A(hz,Z) (Z*2 - 1) + +// XXX implement S_PLUS w/ an X overlayed: +// 3 . . . +// 3 ... +// Z ..X.. +// 3 ... +// 3 . . . + +// XXX implement an X shape: +// 2 . . +// 2 . . +// 1 X +// 2 . . +// 2 . . + +// 1x1 square +#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) + +#define T1 (T+1) +#define FOR_FRAME(r) for (r.z = 0; r.z < T1; r.z++) + +#ifdef LUMA_raw +#define RF_ RF_LUMA +#else +#define RF_ RF +#endif + +// Skip comparing the pixel-of-interest against itself, unless RF is enabled +#if RF_ +#define RINCR(z,c) (z.c++) +#else +#define RINCR DINCR +#endif + +#define R_AREA(a) (a * T1 + RF_-1) + +// research shapes +// XXX would be nice to have the option of temporally-varying research sizes +#if R == 0 || R == 1 +#define FOR_RESEARCH(r) S_1X1(r) +const int r_area = R_AREA(1); +#elif RS == 7 +#define FOR_RESEARCH(r) S_PLUS(r,hr,RINCR(r,y)) +const int r_area = R_AREA(S_PLUS_A(hr,R)); +#elif RS == 6 +#define FOR_RESEARCH(r) S_SQUARE_EVEN(r,hr,RINCR(r,y)) +const int r_area = R_AREA(R*R); +#elif RS == 5 +#define FOR_RESEARCH(r) S_TRUNC_TRIANGLE(r,hr,RINCR(r,x)) +const int r_area = R_AREA(S_TRIANGLE_A(hr,hr)); +#elif RS == 4 +#define FOR_RESEARCH(r) S_TRIANGLE(r,hr,RINCR(r,x)) +const int r_area = R_AREA(S_TRIANGLE_A(hr,R)); +#elif RS == 3 +#define FOR_RESEARCH(r) S_DIAMOND(r,hr,RINCR(r,y)) +const int r_area = R_AREA(S_DIAMOND_A(hr,R)); +#elif RS == 2 +#define FOR_RESEARCH(r) S_VERTICAL(r,hr,RINCR(r,y)) +const int r_area = R_AREA(R); +#elif RS == 1 +#define FOR_RESEARCH(r) S_HORIZONTAL(r,hr,RINCR(r,x)) +const int r_area = R_AREA(R); +#elif RS == 0 +#define FOR_RESEARCH(r) S_SQUARE(r,hr,RINCR(r,y)) +const int r_area = R_AREA(R*R); +#endif + +#define RI1 (RI+1) +#define RFI1 (RFI+1) + +#if RI +#define FOR_ROTATION for (float ri = 0; ri < 360; ri+=360.0/RI1) +#else +#define FOR_ROTATION +#endif + +#if RFI +#define FOR_REFLECTION for (int rfi = 0; rfi < RFI1; rfi++) +#else +#define FOR_REFLECTION +#endif + +#if PD +#define PINCR DINCR +#else +#define PINCR(z,c) (z.c++) +#endif + +#define P_AREA(a) (a - PD) + +// patch shapes +#if P == 0 || P == 1 +#define FOR_PATCH(p) S_1X1(p) +const int p_area = P_AREA(1); +#elif PS == 7 +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) +const int p_area = P_AREA(S_PLUS_A(hp,P)); +#elif PS == 6 +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) +const int p_area = P_AREA(P*P); +#elif PS == 5 +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) +const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); +#elif PS == 4 +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) +const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); +#elif PS == 3 +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) +const int p_area = P_AREA(S_DIAMOND_A(hp,P)); +#elif PS == 2 +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) +const int p_area = P_AREA(P); +#elif PS == 1 +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) +const int p_area = P_AREA(P); +#elif PS == 0 +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) +const int p_area = P_AREA(P*P); +#endif + +const float r_scale = 1.0/r_area; +const float p_scale = 1.0/p_area; + +#define sample(tex, pos, size, pt, off) tex(pos + pt * (vec2(off) + 0.5 - fract(pos*size))) +#define load_(off) sample(HOOKED_tex, HOOKED_pos, HOOKED_size, HOOKED_pt, off) + +#if RF_ && defined(LUMA_raw) +#define load2_(off) sample(RF_LUMA_tex, RF_LUMA_pos, RF_LUMA_size, RF_LUMA_pt, off) +#define gather_offs(off, off_arr) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, off_arr))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off) * RF_LUMA_pt, 0) +#elif RF_ && D1W +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) +#define gather_offs(off, off_arr) (RF_mul * vec4(textureGatherOffsets(RF_raw, RF_pos + vec2(off) * RF_pt, off_arr))) +#define gather(off) RF_gather(RF_pos + (off) * RF_pt, 0) +#elif RF_ +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) +#else +#define load2_(off) load_(off) +#define gather_offs(off, off_arr) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, off_arr))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) +#endif + +#if T +val load(vec3 off) +{ + switch (min(int(off.z), frame)) { + case 0: return val_swizz(load_(off)); + + } +} +val load2(vec3 off) +{ + return off.z == 0 ? val_swizz(load2_(off)) : load(off); +} +#else +#define load(off) val_swizz(load_(off)) +#define load2(off) val_swizz(load2_(off)) +#endif + +val poi = load(vec3(0)); // pixel-of-interest +val poi2 = load2(vec3(0)); // guide pixel-of-interest + +#if RI // rotation +vec2 rot(vec2 p, float d) +{ + return vec2( + p.x * cos(radians(d)) - p.y * sin(radians(d)), + p.y * sin(radians(d)) + p.x * cos(radians(d)) + ); +} +#else +#define rot(p, d) (p) +#endif + +#if RFI // reflection +vec2 ref(vec2 p, int d) +{ + switch (d) { + case 0: return p; + case 1: return p * vec2(1, -1); + case 2: return p * vec2(-1, 1); + } +} +#else +#define ref(p, d) (p) +#endif + +#if SST && R >= SST +float spatial_r(vec3 v) +{ + v.xy += 0.5 - fract(HOOKED_pos*HOOKED_size); + return SK(length(v*SD)*SS); +} +#else +#define spatial_r(v) (1) +#endif + +#if PST && P >= PST +#define spatial_p(v) PSK(length(v*PSD)*PSS) +#else +#define spatial_p(v) (1) +#endif + +val range(val pdiff_sq) +{ + const float h = S*0.013; + const float pdiff_scale = 1.0/(h*h); + pdiff_sq = sqrt(pdiff_sq * pdiff_scale); +#if defined(LUMA_raw) + return RK(pdiff_sq); +#elif defined(CHROMA_raw) + return vec2(RK(pdiff_sq.x), RK(pdiff_sq.y)); +#else + return vec3(RK(pdiff_sq.x), RK(pdiff_sq.y), RK(pdiff_sq.z)); +#endif + //return exp(-pdiff_sq * pdiff_scale); + + // weight function from the NLM paper, it's not very good + //return exp(-max(pdiff_sq - 2*S*S, 0.0) * pdiff_scale); +} + +val patch_comparison(vec3 r, vec3 r2) +{ + vec3 p; + val min_rot = val(p_area); + + FOR_ROTATION FOR_REFLECTION { + val pdiff_sq = val(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + val diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); + diff_sq *= diff_sq; + diff_sq = 1 - (1 - diff_sq) * spatial_p(p.xy); + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); + } + + return min_rot * p_scale; +} + +#define NO_GATHER (PD == 0 && NG == 0) // never textureGather if any of these conditions are false +#define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) + +#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && REGULAR_ROTATIONS && NO_GATHER +// 3x3 diamond/plus patch_comparison_gather +// XXX extend to support arbitrary sizes (probably requires code generation) +// XXX extend to support 3x3 square +// XXX support PSS +const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; +const ivec2 offsets_sf[4] = { ivec2(0,-1) * SF, ivec2(-1,0) * SF, ivec2(0,1) * SF, ivec2(1,0) * SF }; +vec4 poi_patch = gather_offs(0, offsets); +float patch_comparison_gather(vec3 r, vec3 r2) +{ + float min_rot = p_area - 1; + vec4 transformer = gather_offs(r, offsets_sf); + FOR_ROTATION { + FOR_REFLECTION { + float diff_sq = dot((poi_patch - transformer) * (poi_patch - transformer), vec4(1)); + min_rot = min(diff_sq, min_rot); +#if RFI + switch(rfi) { + case 0: transformer = transformer.zyxw; break; + case 1: transformer = transformer.zwxy; break; // undoes last mirror, performs another mirror + case 2: transformer = transformer.zyxw; break; // undoes last mirror + } +#endif + } +#if RI == 3 + transformer = transformer.wxyz; +#elif RI == 1 + transformer = transformer.zwxy; +#endif + } + float center_diff_sq = poi2.x - load2(r).x; + center_diff_sq *= center_diff_sq; + return (min_rot + center_diff_sq) * p_scale; +} +#elif (defined(LUMA_gather) || D1W) && PS == 6 && RI == 0 && RFI == 0 && NO_GATHER +// tiled even square patch_comparison_gather +// XXX extend to support odd square? +float patch_comparison_gather(vec3 r, vec3 r2) +{ + vec2 tile; + float min_rot = p_area; + + /* gather order: + * w z + * x y + */ + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { + vec4 diff_sq = gather(tile + r.xy) - gather(tile + r2.xy); + diff_sq *= diff_sq; + diff_sq = 1 - (1 - diff_sq) * vec4(spatial_p(tile+vec2(0,1)), spatial_p(tile+vec2(1,1)), + spatial_p(tile+vec2(1,0)), spatial_p(tile+vec2(0,0))); + pdiff_sq += dot(diff_sq, vec4(1)); + } + min_rot = min(min_rot, pdiff_sq); + + return min_rot * p_scale; +} +#else +#define patch_comparison_gather patch_comparison +#endif + +vec4 hook() +{ + val total_weight = val(0); + val sum = val(0); + val result = val(0); + + vec3 r = vec3(0); + vec3 p = vec3(0); + vec3 me = vec3(0); + +#if T && ME == 1 // temporal & motion estimation + vec3 me_tmp = vec3(0); + float maxweight = 0; +#elif T && ME == 2 // temporal & motion estimation + vec3 me_sum = vec3(0); + float me_weight = 0; +#endif + +#if WD == 2 // weight discard + int r_index = 0; + val_packed all_weights[r_area]; + val_packed all_pixels[r_area]; +#elif WD == 1 // weight discard + val no_weights = val(0); + val discard_total_weight = val(0); + val discard_sum = val(0); +#endif + + FOR_FRAME(r) { + // XXX ME is always a frame behind, should have to option to re-research after applying ME (could do it an arbitrary number of times per frame if desired) +#if T && ME == 1 // temporal & motion estimation max weight + if (r.z > 0) { + me += me_tmp * MEF; + me_tmp = vec3(0); + maxweight = 0; + } +#elif T && ME == 2 // temporal & motion estimation weighted average + if (r.z > 0) { + me += round(me_sum / me_weight * MEF); + me_sum = vec3(0); + me_weight = 0; + } +#endif + FOR_RESEARCH(r) { // main NLM logic +#if SKIP_PATCH + val weight = val(1); +#else + val pdiff_sq = (r.z == 0) ? val(patch_comparison_gather(r+me, vec3(0))) : patch_comparison(r+me, vec3(0)); + val weight = range(pdiff_sq); +#endif + +#if T && ME == 1 // temporal & motion estimation max weight + me_tmp = vec3(r.xy,0) * step(maxweight, weight.x) + me_tmp * (1 - step(maxweight, weight.x)); + maxweight = max(maxweight, weight.x); +#elif T && ME == 2 // temporal & motion estimation weighted average + me_sum += vec3(r.xy,0) * weight.x; + me_weight += weight.x; +#endif + +#if D1W + weight = val(weight.x); +#endif + + weight *= spatial_r(r); + +#if WD == 2 // weight discard + all_weights[r_index] = val_pack(weight); + all_pixels[r_index] = val_pack(load(r+me)); + r_index++; +#elif WD == 1 // weight discard + val wd_scale = 1.0/max(no_weights, 1); + val keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); + discard_sum += load(r+me) * weight * (1 - keeps); + discard_total_weight += weight * (1 - keeps); + no_weights += keeps; +#endif + + sum += load(r+me) * weight; + total_weight += weight; + } // FOR_RESEARCH + } // FOR_FRAME + + val avg_weight = total_weight * r_scale; + val old_avg_weight = avg_weight; + +#if WD == 2 // true average + total_weight = val(0); + sum = val(0); + val no_weights = val(0); + + for (int i = 0; i < r_area; i++) { + val w = val_unpack(all_weights[i]); + val px = val_unpack(all_pixels[i]); + val keeps = step(avg_weight*WDT, w); + + w *= keeps; + sum += px * w; + total_weight += w; + no_weights += keeps; + } +#elif WD == 1 // moving cumulative average + total_weight -= discard_total_weight; + sum -= discard_sum; +#endif +#if WD // weight discard + avg_weight = total_weight / no_weights; +#endif + + total_weight += SW * spatial_r(vec3(0)); + sum += poi * SW * spatial_r(vec3(0)); + +#if V == 3 // weight map + result = val(avg_weight); +#else // mean + result = val(sum / total_weight); +#endif + + // store frames for temporal +#if T > 1 + +#endif +#if T && TRF + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(result)); +#elif T + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(poi2)); +#endif + +#if ASW == 0 // pre-WD weights +#define AS_weight old_avg_weight +#elif ASW == 1 // post-WD weights +#define AS_weight avg_weight +#endif + +#if ASK == 0 + val sharpening_strength = pow(AS_weight, val(ASP)); +#elif ASK == 1 + val sharpening_strength = mix( + pow(smoothstep(0.0, 1.0, AS_weight), val(ASP)), + AS_weight, ASC); + // XXX normalize the result to account for a negative ASC? +#elif ASK == 2 + val sharpening_strength = val(ASP); +#endif + +#if AS == 1 // sharpen+denoise + val sharpened = result + (poi - result) * ASF; +#elif AS == 2 // sharpen only + val sharpened = poi + (poi - result) * ASF; +#endif + +#if EP // extremes preserve + float luminance = EP_texOff(0).x; + // EPSILON is needed since pow(0,0) is undefined + float ep_weight = pow(max(min(1-luminance, luminance)*2, EPSILON), (luminance < 0.5 ? DP : BP)); + result = mix(poi, result, ep_weight); +#endif + +#if AS == 1 // sharpen+denoise + result = mix(sharpened, result, sharpening_strength); +#elif AS == 2 // sharpen only + result = mix(sharpened, poi, sharpening_strength); +#endif + +#if V == 4 // edge map + result = sharpening_strength; +#endif + +#if (V == 3 || V == 4) && defined(CHROMA_raw) // drop chroma for these visualizations + return vec4(0.5); +#endif + +#if V == 1 + result = clamp(pow(abs(poi - result), val(0.25)), 0.0, 1.0); +#elif V == 2 + result = (poi - result) * 0.5 + 0.5; +#endif + + return unval(mix(poi, result, BF)); +} + diff --git a/portable_config/shaders/nlmeans_lgc.glsl b/portable_config/shaders/nlmeans_lgc.glsl index f58842ce..384d3a88 100644 --- a/portable_config/shaders/nlmeans_lgc.glsl +++ b/portable_config/shaders/nlmeans_lgc.glsl @@ -19,7 +19,7 @@ * along with this program. If not, see . */ -// Profile description: Experimental luma-guided chroma denoising, kinda similar to KrigBilateral +// Description: nlmeans_lgc.glsl: Experimental luma-guided chroma denoising, kinda similar to KrigBilateral /* The recommended usage of this shader and its variant profiles is to add them * to input.conf and then dispatch the appropriate shader via a keybind during @@ -48,8 +48,8 @@ * of noise. * * The denoiser will not work properly if the content has been upscaled - * beforehand, whether it was done by you or someone down the line. Consider - * issuing a command to downscale in the mpv console, like so: + * beforehand (whether it was done by you or not). In such cases, consider + * issuing a command to downscale in the mpv console (backtick ` key): * * vf toggle scale=-2:720 * @@ -65,12 +65,13 @@ * may be different for your system. * * If your GPU doesn't support textureGather, or if you are on a version of mpv - * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ and VLQ - * profiles. + * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ profile * - * textureGather is LUMA only and limited to the following configurations: + * If you plan on tinkering with NLM's settings, read below: * - * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2}:M!=1 + * textureGather only applies to luma and limited to the these configurations: + * + * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2} * - Default, very fast, rotations and reflections should be free * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. @@ -83,23 +84,14 @@ * * Options which always disable textureGather: * - PD + * - NG */ //!HOOK CHROMA -//!DESC Non-local means (downscale) -//!WIDTH LUMA.w 3 / -//!HEIGHT LUMA.h 3 / -//!BIND LUMA -//!SAVE EP - -vec4 hook() -{ - return LUMA_texOff(0); -} - -//!HOOK CHROMA -//!DESC Non-local means (share) //!BIND LUMA +//!WIDTH LUMA.w +//!HEIGHT LUMA.h +//!DESC Non-local means (RF, share) //!SAVE RF vec4 hook() @@ -109,42 +101,19 @@ vec4 hook() //!HOOK CHROMA //!BIND HOOKED -//!BIND EP //!BIND RF //!DESC Non-local means (nlmeans_lgc.glsl) -//!WIDTH LUMA.w -//!HEIGHT LUMA.h -/* User variables - * - * It is usually preferable to denoise chroma and luma differently, so the user - * variables for luma and chroma are split. - */ +// User variables -/* S = denoising factor - * P = patch size - * R = research size - * - * The denoising factor controls the level of blur, higher is blurrier. - * - * Patch size should usually be an odd number greater than or equal to 3. - * Higher values are slower and not always better. - * - * Research size usually be an odd number greater than or equal to 3. Higher - * values are usually better, but slower and offer diminishing returns. - * - * Even-numbered patch/research sizes will sample between pixels unless PS=6. - * It's not known whether this is ever useful behavior or not. This is - * incompatible with textureGather optimizations, so NG=1 to disable them. - */ +// It is generally preferable to denoise luma and chroma differently, so the +// user variables for luma and chroma are split. + +// Denoising factor (level of blur, higher means more blur) #ifdef LUMA_raw -#define S 2.0 -#define P 3 -#define R 5 +#define S 11.66 #else #define S 11.66 -#define P 3 -#define R 5 #endif /* Adaptive sharpening @@ -152,11 +121,16 @@ vec4 hook() * Uses the blur incurred by denoising to perform an unsharp mask, and uses the * weight map to restrict the sharpening to edges. * - * Use M=4 to get a good look at which areas are/aren't sharpened. + * If you just want to increase/decrease sharpness then you want to change ASF. * - * AS: 2 for sharpening, 1 for sharpening+denoising, 0 to disable - * ASF: Sharpening factor, higher numbers make a sharper underlying image - * ASP: Weight power, higher numbers use more of the sharp image + * Use V=4 to visualize which areas are sharpened (black means sharpen). + * + * AS: + * - 0 to disable + * - 1 to sharpen+denoise + * - 2 to sharpen only + * ASF: Higher numbers make a sharper image + * ASP: Higher numbers use more of the sharp image * ASW: * - 0 to use pre-WD weights * - 1 to use post-WD weights (ASP should be ~2x to compensate) @@ -168,15 +142,15 @@ vec4 hook() */ #ifdef LUMA_raw #define AS 0 -#define ASF 2.0 -#define ASP 4.0 +#define ASF 3.0 +#define ASP 1.0 #define ASW 0 #define ASK 1 #define ASC 0.0 #else #define AS 0 -#define ASF 2.0 -#define ASP 4.0 +#define ASF 3.0 +#define ASP 1.0 #define ASW 0 #define ASK 1 #define ASC 0.0 @@ -184,15 +158,13 @@ vec4 hook() /* Starting weight * - * Lower numbers give less weight to the pixel-of-interest, which may help - * handle higher noise levels, ringing, and may be useful for other things too? + * Also known as the center weight. This represents the weight of the + * pixel-of-interest. Lower numbers may help handle heavy noise & ringing. * - * EPSILON should be used instead of zero to avoid divide-by-zero errors. The - * avg_weight/old_avg_weight variables may be used to make SW adapt to the - * local noise level, e.g., SW=max(avg_weight, EPSILON) + * EPSILON should be used instead of zero to avoid divide-by-zero errors. */ #ifdef LUMA_raw -#define SW 1.0 +#define SW 0.75 #else #define SW 0.75 #endif @@ -204,7 +176,7 @@ vec4 hook() * result, especially around edges. * * WD: - * - 2: True average. Very good quality, but slower and uses more memory. + * - 2: True average. Better quality, but slower and requires GLSL 4.0 or later * - 1: Moving cumulative average. Inaccurate, tends to blur directionally. * - 0: Disable * @@ -212,7 +184,7 @@ vec4 hook() * WDP (only for WD=1): Increasing reduces the threshold for small sample sizes */ #ifdef LUMA_raw -#define WD 2 +#define WD 0 #define WDT 0.5 #define WDP 6.0 #else @@ -223,19 +195,21 @@ vec4 hook() /* Extremes preserve * - * Reduces denoising around very bright/dark areas. The downscaling factor of - * EP (located near the top of this shader) controls the area sampled for - * luminance (higher numbers consider more area). + * Reduces denoising around very bright/dark areas. + * + * The downscaling factor of the EP shader stage affects what is considered a + * bright/dark area. The default of 3 should be fine, it's not recommended to + * change this. * * This is incompatible with RGB. If you have RGB hooks enabled then you will - * have to delete the EP shader stage or specify EP=0 through nlmeans_cfg. + * have to delete the EP shader stage or specify EP=0 through shader_cfg. * * EP: 1 to enable, 0 to disable * DP: EP strength on dark patches, 0 to fully denoise * BP: EP strength on bright patches, 0 to fully denoise */ #ifdef LUMA_raw -#define EP 1 +#define EP 0 #define BP 0.75 #define DP 0.25 #else @@ -250,25 +224,26 @@ vec4 hook() /* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ /* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* Robust filtering +/* Patch & research sizes * - * This setting is dependent on code generation from nlmeans_cfg, so this - * setting can only be enabled via nlmeans_cfg. + * Patch size should be an odd number greater than or equal to 3. Higher values + * are slower and not always better. * - * Compares the pixel-of-interest against a guide, which could be a downscaled - * image or the output of another shader such as guided.glsl + * Research size be an odd number greater than or equal to 3. Higher values are + * generally better, but slower, blurrier, and gives diminishing returns. */ #ifdef LUMA_raw -#define RF 0 +#define P 3 +#define R 5 #else -#define RF 1 +#define P 3 +#define R 5 #endif -/* Search shape +/* Patch and research shapes * - * Determines the shape of patches and research zones. Different shapes have - * different speed and quality characteristics. Every shape (besides square) is - * smaller than square. + * Different shapes have different speed and quality characteristics. Every + * shape (besides square) is smaller than square. * * PS applies applies to patches, RS applies to research zones. * @@ -291,11 +266,22 @@ vec4 hook() #define PS 3 #endif +/* Robust filtering + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. + * + * Compares the pixel-of-interest against a guide, which could be a downscaled + * image or the output of another shader + */ +#define RF_LUMA 0 +#define RF 1 + /* Rotational/reflectional invariance * - * Number of rotations/reflections to try for each patch comparison. Slow, but - * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. The most similar rotation/reflection will be used. + * Number of rotations/reflections to try for each patch comparison. Can be + * slow, but improves feature preservation. More rotations/reflections gives + * diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -312,29 +298,39 @@ vec4 hook() #endif /* Temporal denoising + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. * * Caveats: - * - Slower, each frame needs to be researched - * - Requires vo=gpu-next and nlmeans_temporal.glsl + * - Slower: + * - Each frame needs to be researched (more samples & more math) + * - Gather optimizations only apply to the current frame + * - Requires vo=gpu-next * - Luma-only (this is a bug) * - Buggy * - * Gather samples across multiple frames. May cause motion blur and may - * struggle more with noise that persists across multiple frames (e.g., from - * compression or duplicate frames), but can work very well on high quality - * video. + * May cause motion blur and may struggle more with noise that persists across + * multiple frames (e.g., from compression or duplicate frames), but can work + * very well on high quality video. * * Motion estimation (ME) should improve quality without impacting speed. * * T: number of frames used * ME: motion estimation, 0 for none, 1 for max weight, 2 for weighted avg + * MEF: estimate factor, compensates for ME being one frame behind + * TRF: compare against the denoised frames */ #ifdef LUMA_raw #define T 0 #define ME 1 +#define MEF 2 +#define TRF 0 #else #define T 0 #define ME 0 +#define MEF 2 +#define TRF 0 #endif /* Spatial kernel @@ -346,69 +342,79 @@ vec4 hook() * closer/further, for instance SD=(1,1,0.5) would make the temporal axis * appear closer and increase blur between frames. * - * The intra-patch variants do not yet have well-understood effects. They are - * intended to make large patch sizes more useful. Likely slower. + * The intra-patch variants are supposed to help with larger patch sizes. * - * SS: spatial denoising factor + * SST: enables spatial kernel if R>=PST, 0 fully disables + * SS: spatial sigma * SD: spatial distortion (X, Y, time) - * PSS: intra-patch spatial denoising factor + * PSS: intra-patch spatial sigma * PST: enables intra-patch spatial kernel if P>=PST, 0 fully disables * PSD: intra-patch spatial distortion (X, Y) */ #ifdef LUMA_raw +#define SST 1 #define SS 0.25 -#define SD vec3(1,1,1.5) +#define SD vec3(1,1,1) #define PST 0 #define PSS 0.0 #define PSD vec2(1,1) #else +#define SST 1 #define SS 0.25 -#define SD vec3(1,1,1.5) +#define SD vec3(1,1,1) #define PST 0 #define PSS 0.0 #define PSD vec2(1,1) #endif -// Scaling factor (should match WIDTH/HEIGHT) +/* Kernels + * + * SK: spatial kernel + * RK: range kernel (takes patch differences) + * PSK: intra-patch spatial kernel + * + * List of available kernels: + * + * bicubic + * cos + * gaussian + * lanczos + * quadratic + * sinc + * sphinx + */ #ifdef LUMA_raw -#define SF 1 +#define SK gaussian +#define RK gaussian +#define PSK gaussian #else -#define SF 1 +#define SK gaussian +#define RK gaussian +#define PSK gaussian #endif -/* Estimator - * - * 0: means - * 1: Euclidean medians (extremely slow, may be good for heavy noise) - * 2: weight map (not a denoiser, maybe useful for generating image masks) - * 3: weighted median intensity (slow, may be good for heavy noise) - * 4: edge map (based on the relevant AS settings) - */ +// Scaling factor (should match WIDTH/HEIGHT) #ifdef LUMA_raw -#define M 0 +#define SF 1 #else -#define M 0 +#define SF 1 #endif -/* Difference visualization - * - * Visualizes the difference between input/output image +/* Visualization * * 0: off - * 1: absolute difference scaled by S - * 2: difference centered on 0.5 + * 1: absolute difference between input/output to the power of 0.25 + * 2: difference between input/output centered on 0.5 + * 3: avg_weight + * 4: edge map (based on the relevant AS settings) */ #ifdef LUMA_raw -#define DV 0 +#define V 0 #else -#define DV 0 +#define V 0 #endif -/* Blur factor - * - * 0 to 1, only useful for alternative estimators. You're probably looking for - * "S" (denoising factor), go back to the top of the shader! - */ +// Blur factor (0.0 returns the input image, 1.0 returns the output image) #ifdef LUMA_raw #define BF 1.0 #else @@ -429,17 +435,57 @@ vec4 hook() #define PD 0 #endif -// Duplicate 1st weight (for LGC) +// Duplicate 1st weight (for luma-guided-chroma) #ifdef LUMA_raw -#define D1W 0 +#define D1W 1 #else #define D1W 1 #endif -/* Shader code */ +// Skip patch comparison +#ifdef LUMA_raw +#define SKIP_PATCH 0 +#else +#define SKIP_PATCH 0 +#endif + +// Shader code #define EPSILON 0.00000000001 #define M_PI 3.14159265358979323846 +#define POW2(x) ((x)*(x)) +#define POW3(x) ((x)*(x)*(x)) +#define bicubic(x) ((1.0/6.0) * (POW3((x)+2) - 4 * POW3((x)+1) + 6 * POW3(x) - 4 * POW3(max((x)-1, 0)))) +#define gaussian(x) exp(-1 * POW2(x)) +#define lanczos(x) POW2(sinc(x)) +#define quadratic(x) ((x) < 0.5 ? 0.75 - POW2(x) : 0.5 * POW2((x) - 1.5)) +#define sinc(x) ((x) < 1e-8 ? 1.0 : sin((x)*M_PI) / ((x)*M_PI)) +#define sphinx(x) ((x) < 1e-8 ? 1.0 : 3.0 * (sin((x)*M_PI) - (x)*M_PI * cos((x)*M_PI)) / POW3((x)*M_PI)) + +// XXX could maybe be better optimized on LGC +// XXX return original alpha component instead of 1.0 +#if defined(LUMA_raw) +#define val float +#define val_swizz(v) (v.x) +#define unval(v) vec4(v.x, 0, 0, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#elif defined(CHROMA_raw) +#define val vec2 +#define val_swizz(v) (v.xy) +#define unval(v) vec4(v.x, v.y, 0, 1.0) +#define val_packed uint +#define val_pack(v) packUnorm2x16(v) +#define val_unpack(v) unpackUnorm2x16(v) +#else +#define val vec3 +#define val_swizz(v) (v.xyz) +#define unval(v) vec4(v.x, v.y, v.z, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#endif #if PS == 6 const int hp = P/2; @@ -454,39 +500,96 @@ const float hr = int(R/2) - 0.5*(1-(R%2)); // sample between pixels for even res #endif // donut increment, increments without landing on (0,0,0) -// much faster than a "continue" statement +// much faster than a continue statement #define DINCR(z,c) (z.c++,(z.c += int(z == vec3(0)))) -// search shapes and their corresponding areas -#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) +// patch/research shapes +// each shape is depicted in a comment, where Z=5 (Z corresponds to P or R) +// dots (.) represent samples (pixels) and X represents the pixel-of-interest + +// Z ..... +// Z ..... +// Z ..X.. +// Z ..... +// Z ..... +#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) +// (in this instance Z=4) +// Z .... +// Z .... +// Z ..X. +// Z .... +#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) + +// Z-4 . +// Z-2 ... +// Z ..X.. #define S_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz); incr) + +// Z-4 . +// Z-2 ... +// hz+1 ..X #define S_TRUNC_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz)*int(z.y!=0); incr) #define S_TRIANGLE_A(hz,Z) int(hz*hz+Z) +// Z-4 . +// Z-2 ... +// Z ..X.. +// Z-2 ... +// Z-4 . #define S_DIAMOND(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -abs(abs(z.x) - hz); z.y <= abs(abs(z.x) - hz); incr) #define S_DIAMOND_A(hz,Z) int(hz*hz*2+Z) -#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) +// +// Z ..X.. +// #define S_HORIZONTAL(z,hz,incr) for (z.x = -hz; z.x <= hz; incr) for (z.y = 0; z.y <= 0; z.y++) +// 90 degree rotation of S_HORIZONTAL +#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) + +// 1 . +// 1 . +// Z ..X.. +// 1 . +// 1 . #define S_PLUS(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz * int(z.x == 0); z.y <= hz * int(z.x == 0); incr) #define S_PLUS_A(hz,Z) (Z*2 - 1) -#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) -#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) +// XXX implement S_PLUS w/ an X overlayed: +// 3 . . . +// 3 ... +// Z ..X.. +// 3 ... +// 3 . . . + +// XXX implement an X shape: +// 2 . . +// 2 . . +// 1 X +// 2 . . +// 2 . . + +// 1x1 square +#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) #define T1 (T+1) #define FOR_FRAME(r) for (r.z = 0; r.z < T1; r.z++) +#ifdef LUMA_raw +#define RF_ RF_LUMA +#else +#define RF_ RF +#endif + // Skip comparing the pixel-of-interest against itself, unless RF is enabled -#if RF +#if RF_ #define RINCR(z,c) (z.c++) #else #define RINCR DINCR #endif -#define R_AREA(a) (a * T1 + RF-1) +#define R_AREA(a) (a * T1 + RF_-1) // research shapes // XXX would be nice to have the option of temporally-varying research sizes @@ -575,44 +678,44 @@ const int p_area = P_AREA(P*P); const float r_scale = 1.0/r_area; const float p_scale = 1.0/p_area; -#define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define sample(tex, pos, size, pt, off) tex(pos + pt * (vec2(off) + 0.5 - fract(pos*size))) +#define load_(off) sample(HOOKED_tex, HOOKED_pos, HOOKED_size, HOOKED_pt, off) -#if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#if RF_ && defined(LUMA_raw) +#define load2_(off) sample(RF_LUMA_tex, RF_LUMA_pos, RF_LUMA_size, RF_LUMA_pt, off) #define gather_offs(off, off_arr) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, off_arr))) #define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off) * RF_LUMA_pt, 0) -#elif RF && D1W -#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) +#elif RF_ && D1W +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) #define gather_offs(off, off_arr) (RF_mul * vec4(textureGatherOffsets(RF_raw, RF_pos + vec2(off) * RF_pt, off_arr))) #define gather(off) RF_gather(RF_pos + (off) * RF_pt, 0) -#elif RF -#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) +#elif RF_ +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) #else -#define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) load_(off) #define gather_offs(off, off_arr) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, off_arr))) #define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T -vec4 load(vec3 off) +val load(vec3 off) { - switch (int(off.z)) { - case 0: return load_(off); + switch (min(int(off.z), frame)) { + case 0: return val_swizz(load_(off)); + } } -vec4 load2(vec3 off) +val load2(vec3 off) { - switch (int(off.z)) { - case 0: return load2_(off); - } + return off.z == 0 ? val_swizz(load2_(off)) : load(off); } #else -#define load(off) load_(off) -#define load2(off) load2_(off) +#define load(off) val_swizz(load_(off)) +#define load2(off) val_swizz(load2_(off)) #endif -vec4 poi = load(vec3(0)); // pixel-of-interest -vec4 poi2 = load2(vec3(0)); // guide pixel-of-interest +val poi = load(vec3(0)); // pixel-of-interest +val poi2 = load2(vec3(0)); // guide pixel-of-interest #if RI // rotation vec2 rot(vec2 p, float d) @@ -639,22 +742,52 @@ vec2 ref(vec2 p, int d) #define ref(p, d) (p) #endif -vec4 patch_comparison(vec3 r, vec3 r2) +#if SST && R >= SST +float spatial_r(vec3 v) +{ + v.xy += 0.5 - fract(HOOKED_pos*HOOKED_size); + return SK(length(v*SD)*SS); +} +#else +#define spatial_r(v) (1) +#endif + +#if PST && P >= PST +#define spatial_p(v) PSK(length(v*PSD)*PSS) +#else +#define spatial_p(v) (1) +#endif + +val range(val pdiff_sq) +{ + const float h = S*0.013; + const float pdiff_scale = 1.0/(h*h); + pdiff_sq = sqrt(pdiff_sq * pdiff_scale); +#if defined(LUMA_raw) + return RK(pdiff_sq); +#elif defined(CHROMA_raw) + return vec2(RK(pdiff_sq.x), RK(pdiff_sq.y)); +#else + return vec3(RK(pdiff_sq.x), RK(pdiff_sq.y), RK(pdiff_sq.z)); +#endif + //return exp(-pdiff_sq * pdiff_scale); + + // weight function from the NLM paper, it's not very good + //return exp(-max(pdiff_sq - 2*S*S, 0.0) * pdiff_scale); +} + +val patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 min_rot = vec4(p_area); + val min_rot = val(p_area); FOR_ROTATION FOR_REFLECTION { - vec4 pdiff_sq = vec4(0); + val pdiff_sq = val(0); FOR_PATCH(p) { vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); + val diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); diff_sq *= diff_sq; -#if PST && P >= PST - float pdist = length(p.xy*PSD)*PSS; - pdist = exp(-(pdist*pdist)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); -#endif + diff_sq = 1 - (1 - diff_sq) * spatial_p(p.xy); pdiff_sq += diff_sq; } min_rot = min(min_rot, pdiff_sq); @@ -666,14 +799,15 @@ vec4 patch_comparison(vec3 r, vec3 r2) #define NO_GATHER (PD == 0 && NG == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather // XXX extend to support arbitrary sizes (probably requires code generation) // XXX extend to support 3x3 square +// XXX support PSS const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; const ivec2 offsets_sf[4] = { ivec2(0,-1) * SF, ivec2(-1,0) * SF, ivec2(0,1) * SF, ivec2(1,0) * SF }; vec4 poi_patch = gather_offs(0, offsets); -vec4 patch_comparison_gather(vec3 r, vec3 r2) +float patch_comparison_gather(vec3 r, vec3 r2) { float min_rot = p_area - 1; vec4 transformer = gather_offs(r, offsets_sf); @@ -697,13 +831,12 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) } float center_diff_sq = poi2.x - load2(r).x; center_diff_sq *= center_diff_sq; - return vec4(min_rot + center_diff_sq, 0, 0, 0) * p_scale; + return (min_rot + center_diff_sq) * p_scale; } -#elif (defined(LUMA_gather) || D1W) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER +#elif (defined(LUMA_gather) || D1W) && PS == 6 && RI == 0 && RFI == 0 && NO_GATHER // tiled even square patch_comparison_gather // XXX extend to support odd square? -// XXX rotations/reflections appear to be subtly broken -vec4 patch_comparison_gather(vec3 r, vec3 r2) +float patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; float min_rot = p_area; @@ -712,40 +845,17 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) * w z * x y */ - FOR_ROTATION FOR_REFLECTION { - float pdiff_sq = 0; - for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { - vec4 poi_patch = gather(tile + r2.xy); - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - -#if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees -#endif -#if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } -#endif - - vec4 diff_sq = (poi_patch - transformer) * (poi_patch - transformer); -#if PST && P >= PST - // XXX refactor to avoid pow (should probably break off into a function) - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq += dot(diff_sq, vec4(1)); - } - min_rot = min(min_rot, pdiff_sq); + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { + vec4 diff_sq = gather(tile + r.xy) - gather(tile + r2.xy); + diff_sq *= diff_sq; + diff_sq = 1 - (1 - diff_sq) * vec4(spatial_p(tile+vec2(0,1)), spatial_p(tile+vec2(1,1)), + spatial_p(tile+vec2(1,0)), spatial_p(tile+vec2(0,0))); + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); - return vec4(min_rot, 0, 0, 0) * p_scale; + return min_rot * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -753,9 +863,9 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) vec4 hook() { - vec4 total_weight = vec4(0); - vec4 sum = vec4(0); - vec4 result = vec4(0); + val total_weight = val(0); + val sum = val(0); + val result = val(0); vec3 r = vec3(0); vec3 p = vec3(0); @@ -769,41 +879,38 @@ vec4 hook() float me_weight = 0; #endif -#if WD == 2 || M == 3 // weight discard, weighted median intensities +#if WD == 2 // weight discard int r_index = 0; - vec4 all_weights[r_area]; - vec4 all_pixels[r_area]; + val_packed all_weights[r_area]; + val_packed all_pixels[r_area]; #elif WD == 1 // weight discard - vec4 no_weights = vec4(0); - vec4 discard_total_weight = vec4(0); - vec4 discard_sum = vec4(0); -#endif - -#if M == 1 // Euclidean medians - vec4 minsum = vec4(0); + val no_weights = val(0); + val discard_total_weight = val(0); + val discard_sum = val(0); #endif FOR_FRAME(r) { // XXX ME is always a frame behind, should have to option to re-research after applying ME (could do it an arbitrary number of times per frame if desired) #if T && ME == 1 // temporal & motion estimation max weight if (r.z > 0) { - me += me_tmp; + me += me_tmp * MEF; me_tmp = vec3(0); maxweight = 0; } #elif T && ME == 2 // temporal & motion estimation weighted average if (r.z > 0) { - me += round(me_sum / me_weight); + me += round(me_sum / me_weight * MEF); me_sum = vec3(0); me_weight = 0; } #endif - FOR_RESEARCH(r) { - // main NLM logic - const float h = S*0.013; - const float pdiff_scale = 1.0/(h*h); - vec4 pdiff_sq = (r.z == 0) ? patch_comparison_gather(r+me, vec3(0)) : patch_comparison(r+me, vec3(0)); - vec4 weight = exp(-pdiff_sq * pdiff_scale); + FOR_RESEARCH(r) { // main NLM logic +#if SKIP_PATCH + val weight = val(1); +#else + val pdiff_sq = (r.z == 0) ? val(patch_comparison_gather(r+me, vec3(0))) : patch_comparison(r+me, vec3(0)); + val weight = range(pdiff_sq); +#endif #if T && ME == 1 // temporal & motion estimation max weight me_tmp = vec3(r.xy,0) * step(maxweight, weight.x) + me_tmp * (1 - step(maxweight, weight.x)); @@ -814,18 +921,18 @@ vec4 hook() #endif #if D1W - weight = vec4(weight.x); + weight = val(weight.x); #endif - weight *= exp(-(length(r*SD)*SS * length(r*SD)*SS)); // spatial kernel + weight *= spatial_r(r); -#if WD == 2 || M == 3 // weight discard, weighted median intensity - all_weights[r_index] = weight; - all_pixels[r_index] = load(r+me); +#if WD == 2 // weight discard + all_weights[r_index] = val_pack(weight); + all_pixels[r_index] = val_pack(load(r+me)); r_index++; #elif WD == 1 // weight discard - vec4 wd_scale = 1.0/max(no_weights, 1); - vec4 keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); + val wd_scale = 1.0/max(no_weights, 1); + val keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); discard_sum += load(r+me) * weight * (1 - keeps); discard_total_weight += weight * (1 - keeps); no_weights += keeps; @@ -833,45 +940,25 @@ vec4 hook() sum += load(r+me) * weight; total_weight += weight; - -#if M == 1 // Euclidean median - // Based on: https://arxiv.org/abs/1207.3056 - // XXX might not work with ME - vec3 r2; - vec4 wpdist_sum = vec4(0); - FOR_FRAME(r2) FOR_RESEARCH(r2) { - vec4 pdist = (r.z + r2.z) == 0 ? patch_comparison_gather(r+me, r2+me) : patch_comparison(r+me, r2+me); - wpdist_sum += sqrt(pdist) * (1-weight); - } - - vec4 newmin = step(wpdist_sum, minsum); // wpdist_sum <= minsum - newmin *= 1 - step(wpdist_sum, vec4(0)); // && wpdist_sum > 0 - newmin += step(minsum, vec4(0)); // || minsum <= 0 - newmin = min(newmin, 1); - - minsum = (newmin * wpdist_sum) + ((1-newmin) * minsum); - result = (newmin * load(r+me)) + ((1-newmin) * result); -#endif } // FOR_RESEARCH } // FOR_FRAME - // XXX optionally put the denoised pixel into the frame buffer? -#if T // temporal -#endif - - vec4 avg_weight = total_weight * r_scale; - vec4 old_avg_weight = avg_weight; + val avg_weight = total_weight * r_scale; + val old_avg_weight = avg_weight; #if WD == 2 // true average - total_weight = vec4(0); - sum = vec4(0); - vec4 no_weights = vec4(0); + total_weight = val(0); + sum = val(0); + val no_weights = val(0); for (int i = 0; i < r_area; i++) { - vec4 keeps = step(avg_weight*WDT, all_weights[i]); - all_weights[i] *= keeps; - sum += all_pixels[i] * all_weights[i]; - total_weight += all_weights[i]; + val w = val_unpack(all_weights[i]); + val px = val_unpack(all_pixels[i]); + val keeps = step(avg_weight*WDT, w); + + w *= keeps; + sum += px * w; + total_weight += w; no_weights += keeps; } #elif WD == 1 // moving cumulative average @@ -882,29 +969,23 @@ vec4 hook() avg_weight = total_weight / no_weights; #endif - total_weight += SW; - sum += poi * SW; + total_weight += SW * spatial_r(vec3(0)); + sum += poi * SW * spatial_r(vec3(0)); -#if M == 3 // weighted median intensity - const float hr_area = r_area/2.0; - vec4 is_median, gt, lt, gte, lte, neq; +#if V == 3 // weight map + result = val(avg_weight); +#else // mean + result = val(sum / total_weight); +#endif - for (int i = 0; i < r_area; i++) { - gt = lt = vec4(0); - for (int j = 0; j < r_area; j++) { - gte = step(all_pixels[i]*all_weights[i], all_pixels[j]*all_weights[j]); - lte = step(all_pixels[j]*all_weights[j], all_pixels[i]*all_weights[i]); - neq = 1 - gte * lte; - gt += gte * neq; - lt += lte * neq; - } - is_median = step(gt, vec4(hr_area)) * step(lt, vec4(hr_area)); - result += step(result, vec4(0)) * is_median * all_pixels[i]; - } -#elif M == 2 // weight map - result = avg_weight; -#elif M == 0 // mean - result = sum / total_weight; + // store frames for temporal +#if T > 1 + +#endif +#if T && TRF + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(result)); +#elif T + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(poi2)); #endif #if ASW == 0 // pre-WD weights @@ -914,22 +995,20 @@ vec4 hook() #endif #if ASK == 0 - vec4 sharpening_strength = pow(AS_weight, vec4(ASP)); + val sharpening_strength = pow(AS_weight, val(ASP)); #elif ASK == 1 -#define sigmoid(x) (tanh(x * 2*M_PI - M_PI)*0.5+0.5) - vec4 sharpening_strength = mix(pow(sigmoid(AS_weight), vec4(ASP)), - AS_weight, ASC); - // just in case ASC < 0 (will sharpen but it's janky XXX) - sharpening_strength = clamp(sharpening_strength, 0.0, 1.0); + val sharpening_strength = mix( + pow(smoothstep(0.0, 1.0, AS_weight), val(ASP)), + AS_weight, ASC); + // XXX normalize the result to account for a negative ASC? #elif ASK == 2 - vec4 sharpening_strength = vec4(ASP); + val sharpening_strength = val(ASP); #endif - // XXX maybe allow for alternative blurs? e.g., replace result w/ load2? #if AS == 1 // sharpen+denoise - vec4 sharpened = result + (poi - result) * ASF; + val sharpened = result + (poi - result) * ASF; #elif AS == 2 // sharpen only - vec4 sharpened = poi + (poi - result) * ASF; + val sharpened = poi + (poi - result) * ASF; #endif #if EP // extremes preserve @@ -945,20 +1024,20 @@ vec4 hook() result = mix(sharpened, poi, sharpening_strength); #endif -#if M == 4 // edge map +#if V == 4 // edge map result = sharpening_strength; #endif -#if (M == 2 || M == 4) && defined(CHROMA_raw) // drop chroma for weight maps - result = vec4(0.5); +#if (V == 3 || V == 4) && defined(CHROMA_raw) // drop chroma for these visualizations + return vec4(0.5); #endif -#if DV == 1 - result = clamp(abs(poi - result) * S, 0.0, 1.0); -#elif DV == 2 +#if V == 1 + result = clamp(pow(abs(poi - result), val(0.25)), 0.0, 1.0); +#elif V == 2 result = (poi - result) * 0.5 + 0.5; #endif - return mix(poi, result, BF); + return unval(mix(poi, result, BF)); } diff --git a/portable_config/shaders/nlmeans_lq.glsl b/portable_config/shaders/nlmeans_lq.glsl index 210708b5..80eaf745 100644 --- a/portable_config/shaders/nlmeans_lq.glsl +++ b/portable_config/shaders/nlmeans_lq.glsl @@ -19,7 +19,7 @@ * along with this program. If not, see . */ -// Profile description: Faster, but lower quality. +// Description: nlmeans_lq.glsl: Faster, but lower quality. /* The recommended usage of this shader and its variant profiles is to add them * to input.conf and then dispatch the appropriate shader via a keybind during @@ -48,8 +48,8 @@ * of noise. * * The denoiser will not work properly if the content has been upscaled - * beforehand, whether it was done by you or someone down the line. Consider - * issuing a command to downscale in the mpv console, like so: + * beforehand (whether it was done by you or not). In such cases, consider + * issuing a command to downscale in the mpv console (backtick ` key): * * vf toggle scale=-2:720 * @@ -65,12 +65,13 @@ * may be different for your system. * * If your GPU doesn't support textureGather, or if you are on a version of mpv - * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ and VLQ - * profiles. + * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ profile * - * textureGather is LUMA only and limited to the following configurations: + * If you plan on tinkering with NLM's settings, read below: * - * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2}:M!=1 + * textureGather only applies to luma and limited to the these configurations: + * + * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2} * - Default, very fast, rotations and reflections should be free * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. @@ -83,15 +84,16 @@ * * Options which always disable textureGather: * - PD + * - NG */ //!HOOK LUMA //!HOOK CHROMA -//!DESC Non-local means (downscale) //!BIND HOOKED -//!SAVE PRERF_LUMA //!WIDTH HOOKED.w 1.25 / //!HEIGHT HOOKED.h 1.25 / +//!DESC Non-local means (PRERF) +//!SAVE PRERF_LUMA vec4 hook() { @@ -100,11 +102,11 @@ vec4 hook() //!HOOK LUMA //!HOOK CHROMA -//!DESC Non-local means (unscale) //!BIND PRERF_LUMA -//!SAVE RF_LUMA //!WIDTH HOOKED.w //!HEIGHT HOOKED.h +//!DESC Non-local means (RF) +//!SAVE RF_LUMA vec4 hook() { @@ -113,66 +115,48 @@ vec4 hook() //!HOOK LUMA //!HOOK CHROMA -//!DESC Non-local means (downscale) -//!WIDTH LUMA.w 3 / -//!HEIGHT LUMA.h 3 / -//!BIND LUMA -//!SAVE EP +//!BIND RF_LUMA +//!WIDTH RF_LUMA.w +//!HEIGHT RF_LUMA.h +//!DESC Non-local means (RF, share) +//!SAVE RF vec4 hook() { - return LUMA_texOff(0); + return RF_LUMA_texOff(0); } //!HOOK LUMA //!HOOK CHROMA -//!DESC Non-local means (share) -//!BIND RF_LUMA -//!SAVE RF +//!BIND LUMA +//!WIDTH LUMA.w 3 / +//!HEIGHT LUMA.h 3 / +//!DESC Non-local means (EP) +//!SAVE EP vec4 hook() { - return RF_LUMA_texOff(0); + return LUMA_texOff(0); } //!HOOK LUMA //!HOOK CHROMA //!BIND HOOKED //!BIND RF_LUMA -//!BIND EP //!BIND RF +//!BIND EP //!DESC Non-local means (nlmeans_lq.glsl) -/* User variables - * - * It is usually preferable to denoise chroma and luma differently, so the user - * variables for luma and chroma are split. - */ +// User variables -/* S = denoising factor - * P = patch size - * R = research size - * - * The denoising factor controls the level of blur, higher is blurrier. - * - * Patch size should usually be an odd number greater than or equal to 3. - * Higher values are slower and not always better. - * - * Research size usually be an odd number greater than or equal to 3. Higher - * values are usually better, but slower and offer diminishing returns. - * - * Even-numbered patch/research sizes will sample between pixels unless PS=6. - * It's not known whether this is ever useful behavior or not. This is - * incompatible with textureGather optimizations, so NG=1 to disable them. - */ +// It is generally preferable to denoise luma and chroma differently, so the +// user variables for luma and chroma are split. + +// Denoising factor (level of blur, higher means more blur) #ifdef LUMA_raw #define S 1.25 -#define P 3 -#define R 3 #else #define S 5.0 -#define P 3 -#define R 5 #endif /* Adaptive sharpening @@ -180,11 +164,16 @@ vec4 hook() * Uses the blur incurred by denoising to perform an unsharp mask, and uses the * weight map to restrict the sharpening to edges. * - * Use M=4 to get a good look at which areas are/aren't sharpened. + * If you just want to increase/decrease sharpness then you want to change ASF. + * + * Use V=4 to visualize which areas are sharpened (black means sharpen). * - * AS: 2 for sharpening, 1 for sharpening+denoising, 0 to disable - * ASF: Sharpening factor, higher numbers make a sharper underlying image - * ASP: Weight power, higher numbers use more of the sharp image + * AS: + * - 0 to disable + * - 1 to sharpen+denoise + * - 2 to sharpen only + * ASF: Higher numbers make a sharper image + * ASP: Higher numbers use more of the sharp image * ASW: * - 0 to use pre-WD weights * - 1 to use post-WD weights (ASP should be ~2x to compensate) @@ -196,15 +185,15 @@ vec4 hook() */ #ifdef LUMA_raw #define AS 0 -#define ASF 2.0 -#define ASP 4.0 +#define ASF 3.0 +#define ASP 1.0 #define ASW 0 #define ASK 1 #define ASC 0.0 #else #define AS 0 -#define ASF 2.0 -#define ASP 4.0 +#define ASF 3.0 +#define ASP 1.0 #define ASW 0 #define ASK 1 #define ASC 0.0 @@ -212,12 +201,10 @@ vec4 hook() /* Starting weight * - * Lower numbers give less weight to the pixel-of-interest, which may help - * handle higher noise levels, ringing, and may be useful for other things too? + * Also known as the center weight. This represents the weight of the + * pixel-of-interest. Lower numbers may help handle heavy noise & ringing. * - * EPSILON should be used instead of zero to avoid divide-by-zero errors. The - * avg_weight/old_avg_weight variables may be used to make SW adapt to the - * local noise level, e.g., SW=max(avg_weight, EPSILON) + * EPSILON should be used instead of zero to avoid divide-by-zero errors. */ #ifdef LUMA_raw #define SW 1.0 @@ -232,7 +219,7 @@ vec4 hook() * result, especially around edges. * * WD: - * - 2: True average. Very good quality, but slower and uses more memory. + * - 2: True average. Better quality, but slower and requires GLSL 4.0 or later * - 1: Moving cumulative average. Inaccurate, tends to blur directionally. * - 0: Disable * @@ -251,12 +238,14 @@ vec4 hook() /* Extremes preserve * - * Reduces denoising around very bright/dark areas. The downscaling factor of - * EP (located near the top of this shader) controls the area sampled for - * luminance (higher numbers consider more area). + * Reduces denoising around very bright/dark areas. + * + * The downscaling factor of the EP shader stage affects what is considered a + * bright/dark area. The default of 3 should be fine, it's not recommended to + * change this. * * This is incompatible with RGB. If you have RGB hooks enabled then you will - * have to delete the EP shader stage or specify EP=0 through nlmeans_cfg. + * have to delete the EP shader stage or specify EP=0 through shader_cfg. * * EP: 1 to enable, 0 to disable * DP: EP strength on dark patches, 0 to fully denoise @@ -278,25 +267,26 @@ vec4 hook() /* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ /* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* Robust filtering +/* Patch & research sizes * - * This setting is dependent on code generation from nlmeans_cfg, so this - * setting can only be enabled via nlmeans_cfg. + * Patch size should be an odd number greater than or equal to 3. Higher values + * are slower and not always better. * - * Compares the pixel-of-interest against a guide, which could be a downscaled - * image or the output of another shader such as guided.glsl + * Research size be an odd number greater than or equal to 3. Higher values are + * generally better, but slower, blurrier, and gives diminishing returns. */ #ifdef LUMA_raw -#define RF 1 +#define P 3 +#define R 3 #else -#define RF 1 +#define P 3 +#define R 5 #endif -/* Search shape +/* Patch and research shapes * - * Determines the shape of patches and research zones. Different shapes have - * different speed and quality characteristics. Every shape (besides square) is - * smaller than square. + * Different shapes have different speed and quality characteristics. Every + * shape (besides square) is smaller than square. * * PS applies applies to patches, RS applies to research zones. * @@ -319,11 +309,22 @@ vec4 hook() #define PS 3 #endif +/* Robust filtering + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. + * + * Compares the pixel-of-interest against a guide, which could be a downscaled + * image or the output of another shader + */ +#define RF_LUMA 1 +#define RF 1 + /* Rotational/reflectional invariance * - * Number of rotations/reflections to try for each patch comparison. Slow, but - * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. The most similar rotation/reflection will be used. + * Number of rotations/reflections to try for each patch comparison. Can be + * slow, but improves feature preservation. More rotations/reflections gives + * diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -340,29 +341,39 @@ vec4 hook() #endif /* Temporal denoising + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. * * Caveats: - * - Slower, each frame needs to be researched - * - Requires vo=gpu-next and nlmeans_temporal.glsl + * - Slower: + * - Each frame needs to be researched (more samples & more math) + * - Gather optimizations only apply to the current frame + * - Requires vo=gpu-next * - Luma-only (this is a bug) * - Buggy * - * Gather samples across multiple frames. May cause motion blur and may - * struggle more with noise that persists across multiple frames (e.g., from - * compression or duplicate frames), but can work very well on high quality - * video. + * May cause motion blur and may struggle more with noise that persists across + * multiple frames (e.g., from compression or duplicate frames), but can work + * very well on high quality video. * * Motion estimation (ME) should improve quality without impacting speed. * * T: number of frames used * ME: motion estimation, 0 for none, 1 for max weight, 2 for weighted avg + * MEF: estimate factor, compensates for ME being one frame behind + * TRF: compare against the denoised frames */ #ifdef LUMA_raw #define T 0 #define ME 1 +#define MEF 2 +#define TRF 0 #else #define T 0 #define ME 0 +#define MEF 2 +#define TRF 0 #endif /* Spatial kernel @@ -374,69 +385,79 @@ vec4 hook() * closer/further, for instance SD=(1,1,0.5) would make the temporal axis * appear closer and increase blur between frames. * - * The intra-patch variants do not yet have well-understood effects. They are - * intended to make large patch sizes more useful. Likely slower. + * The intra-patch variants are supposed to help with larger patch sizes. * - * SS: spatial denoising factor + * SST: enables spatial kernel if R>=PST, 0 fully disables + * SS: spatial sigma * SD: spatial distortion (X, Y, time) - * PSS: intra-patch spatial denoising factor + * PSS: intra-patch spatial sigma * PST: enables intra-patch spatial kernel if P>=PST, 0 fully disables * PSD: intra-patch spatial distortion (X, Y) */ #ifdef LUMA_raw +#define SST 1 #define SS 0.25 -#define SD vec3(1,1,1.5) +#define SD vec3(1,1,1) #define PST 0 #define PSS 0.0 #define PSD vec2(1,1) #else +#define SST 1 #define SS 0.25 -#define SD vec3(1,1,1.5) +#define SD vec3(1,1,1) #define PST 0 #define PSS 0.0 #define PSD vec2(1,1) #endif -// Scaling factor (should match WIDTH/HEIGHT) +/* Kernels + * + * SK: spatial kernel + * RK: range kernel (takes patch differences) + * PSK: intra-patch spatial kernel + * + * List of available kernels: + * + * bicubic + * cos + * gaussian + * lanczos + * quadratic + * sinc + * sphinx + */ #ifdef LUMA_raw -#define SF 1 +#define SK gaussian +#define RK gaussian +#define PSK gaussian #else -#define SF 1 +#define SK gaussian +#define RK gaussian +#define PSK gaussian #endif -/* Estimator - * - * 0: means - * 1: Euclidean medians (extremely slow, may be good for heavy noise) - * 2: weight map (not a denoiser, maybe useful for generating image masks) - * 3: weighted median intensity (slow, may be good for heavy noise) - * 4: edge map (based on the relevant AS settings) - */ +// Scaling factor (should match WIDTH/HEIGHT) #ifdef LUMA_raw -#define M 0 +#define SF 1 #else -#define M 0 +#define SF 1 #endif -/* Difference visualization - * - * Visualizes the difference between input/output image +/* Visualization * * 0: off - * 1: absolute difference scaled by S - * 2: difference centered on 0.5 + * 1: absolute difference between input/output to the power of 0.25 + * 2: difference between input/output centered on 0.5 + * 3: avg_weight + * 4: edge map (based on the relevant AS settings) */ #ifdef LUMA_raw -#define DV 0 +#define V 0 #else -#define DV 0 +#define V 0 #endif -/* Blur factor - * - * 0 to 1, only useful for alternative estimators. You're probably looking for - * "S" (denoising factor), go back to the top of the shader! - */ +// Blur factor (0.0 returns the input image, 1.0 returns the output image) #ifdef LUMA_raw #define BF 1.0 #else @@ -457,17 +478,57 @@ vec4 hook() #define PD 0 #endif -// Duplicate 1st weight (for LGC) +// Duplicate 1st weight (for luma-guided-chroma) #ifdef LUMA_raw #define D1W 0 #else #define D1W 0 #endif -/* Shader code */ +// Skip patch comparison +#ifdef LUMA_raw +#define SKIP_PATCH 0 +#else +#define SKIP_PATCH 0 +#endif + +// Shader code #define EPSILON 0.00000000001 #define M_PI 3.14159265358979323846 +#define POW2(x) ((x)*(x)) +#define POW3(x) ((x)*(x)*(x)) +#define bicubic(x) ((1.0/6.0) * (POW3((x)+2) - 4 * POW3((x)+1) + 6 * POW3(x) - 4 * POW3(max((x)-1, 0)))) +#define gaussian(x) exp(-1 * POW2(x)) +#define lanczos(x) POW2(sinc(x)) +#define quadratic(x) ((x) < 0.5 ? 0.75 - POW2(x) : 0.5 * POW2((x) - 1.5)) +#define sinc(x) ((x) < 1e-8 ? 1.0 : sin((x)*M_PI) / ((x)*M_PI)) +#define sphinx(x) ((x) < 1e-8 ? 1.0 : 3.0 * (sin((x)*M_PI) - (x)*M_PI * cos((x)*M_PI)) / POW3((x)*M_PI)) + +// XXX could maybe be better optimized on LGC +// XXX return original alpha component instead of 1.0 +#if defined(LUMA_raw) +#define val float +#define val_swizz(v) (v.x) +#define unval(v) vec4(v.x, 0, 0, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#elif defined(CHROMA_raw) +#define val vec2 +#define val_swizz(v) (v.xy) +#define unval(v) vec4(v.x, v.y, 0, 1.0) +#define val_packed uint +#define val_pack(v) packUnorm2x16(v) +#define val_unpack(v) unpackUnorm2x16(v) +#else +#define val vec3 +#define val_swizz(v) (v.xyz) +#define unval(v) vec4(v.x, v.y, v.z, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#endif #if PS == 6 const int hp = P/2; @@ -482,39 +543,96 @@ const float hr = int(R/2) - 0.5*(1-(R%2)); // sample between pixels for even res #endif // donut increment, increments without landing on (0,0,0) -// much faster than a "continue" statement +// much faster than a continue statement #define DINCR(z,c) (z.c++,(z.c += int(z == vec3(0)))) -// search shapes and their corresponding areas -#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) +// patch/research shapes +// each shape is depicted in a comment, where Z=5 (Z corresponds to P or R) +// dots (.) represent samples (pixels) and X represents the pixel-of-interest + +// Z ..... +// Z ..... +// Z ..X.. +// Z ..... +// Z ..... +#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) + +// (in this instance Z=4) +// Z .... +// Z .... +// Z ..X. +// Z .... +#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) +// Z-4 . +// Z-2 ... +// Z ..X.. #define S_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz); incr) + +// Z-4 . +// Z-2 ... +// hz+1 ..X #define S_TRUNC_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz)*int(z.y!=0); incr) #define S_TRIANGLE_A(hz,Z) int(hz*hz+Z) +// Z-4 . +// Z-2 ... +// Z ..X.. +// Z-2 ... +// Z-4 . #define S_DIAMOND(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -abs(abs(z.x) - hz); z.y <= abs(abs(z.x) - hz); incr) #define S_DIAMOND_A(hz,Z) int(hz*hz*2+Z) -#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) +// +// Z ..X.. +// #define S_HORIZONTAL(z,hz,incr) for (z.x = -hz; z.x <= hz; incr) for (z.y = 0; z.y <= 0; z.y++) +// 90 degree rotation of S_HORIZONTAL +#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) + +// 1 . +// 1 . +// Z ..X.. +// 1 . +// 1 . #define S_PLUS(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz * int(z.x == 0); z.y <= hz * int(z.x == 0); incr) #define S_PLUS_A(hz,Z) (Z*2 - 1) -#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) -#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) +// XXX implement S_PLUS w/ an X overlayed: +// 3 . . . +// 3 ... +// Z ..X.. +// 3 ... +// 3 . . . + +// XXX implement an X shape: +// 2 . . +// 2 . . +// 1 X +// 2 . . +// 2 . . + +// 1x1 square +#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) #define T1 (T+1) #define FOR_FRAME(r) for (r.z = 0; r.z < T1; r.z++) +#ifdef LUMA_raw +#define RF_ RF_LUMA +#else +#define RF_ RF +#endif + // Skip comparing the pixel-of-interest against itself, unless RF is enabled -#if RF +#if RF_ #define RINCR(z,c) (z.c++) #else #define RINCR DINCR #endif -#define R_AREA(a) (a * T1 + RF-1) +#define R_AREA(a) (a * T1 + RF_-1) // research shapes // XXX would be nice to have the option of temporally-varying research sizes @@ -603,44 +721,44 @@ const int p_area = P_AREA(P*P); const float r_scale = 1.0/r_area; const float p_scale = 1.0/p_area; -#define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define sample(tex, pos, size, pt, off) tex(pos + pt * (vec2(off) + 0.5 - fract(pos*size))) +#define load_(off) sample(HOOKED_tex, HOOKED_pos, HOOKED_size, HOOKED_pt, off) -#if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#if RF_ && defined(LUMA_raw) +#define load2_(off) sample(RF_LUMA_tex, RF_LUMA_pos, RF_LUMA_size, RF_LUMA_pt, off) #define gather_offs(off, off_arr) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, off_arr))) #define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off) * RF_LUMA_pt, 0) -#elif RF && D1W -#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) +#elif RF_ && D1W +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) #define gather_offs(off, off_arr) (RF_mul * vec4(textureGatherOffsets(RF_raw, RF_pos + vec2(off) * RF_pt, off_arr))) #define gather(off) RF_gather(RF_pos + (off) * RF_pt, 0) -#elif RF -#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) +#elif RF_ +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) #else -#define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) load_(off) #define gather_offs(off, off_arr) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, off_arr))) #define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T -vec4 load(vec3 off) +val load(vec3 off) { - switch (int(off.z)) { - case 0: return load_(off); + switch (min(int(off.z), frame)) { + case 0: return val_swizz(load_(off)); + } } -vec4 load2(vec3 off) +val load2(vec3 off) { - switch (int(off.z)) { - case 0: return load2_(off); - } + return off.z == 0 ? val_swizz(load2_(off)) : load(off); } #else -#define load(off) load_(off) -#define load2(off) load2_(off) +#define load(off) val_swizz(load_(off)) +#define load2(off) val_swizz(load2_(off)) #endif -vec4 poi = load(vec3(0)); // pixel-of-interest -vec4 poi2 = load2(vec3(0)); // guide pixel-of-interest +val poi = load(vec3(0)); // pixel-of-interest +val poi2 = load2(vec3(0)); // guide pixel-of-interest #if RI // rotation vec2 rot(vec2 p, float d) @@ -667,22 +785,52 @@ vec2 ref(vec2 p, int d) #define ref(p, d) (p) #endif -vec4 patch_comparison(vec3 r, vec3 r2) +#if SST && R >= SST +float spatial_r(vec3 v) +{ + v.xy += 0.5 - fract(HOOKED_pos*HOOKED_size); + return SK(length(v*SD)*SS); +} +#else +#define spatial_r(v) (1) +#endif + +#if PST && P >= PST +#define spatial_p(v) PSK(length(v*PSD)*PSS) +#else +#define spatial_p(v) (1) +#endif + +val range(val pdiff_sq) +{ + const float h = S*0.013; + const float pdiff_scale = 1.0/(h*h); + pdiff_sq = sqrt(pdiff_sq * pdiff_scale); +#if defined(LUMA_raw) + return RK(pdiff_sq); +#elif defined(CHROMA_raw) + return vec2(RK(pdiff_sq.x), RK(pdiff_sq.y)); +#else + return vec3(RK(pdiff_sq.x), RK(pdiff_sq.y), RK(pdiff_sq.z)); +#endif + //return exp(-pdiff_sq * pdiff_scale); + + // weight function from the NLM paper, it's not very good + //return exp(-max(pdiff_sq - 2*S*S, 0.0) * pdiff_scale); +} + +val patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 min_rot = vec4(p_area); + val min_rot = val(p_area); FOR_ROTATION FOR_REFLECTION { - vec4 pdiff_sq = vec4(0); + val pdiff_sq = val(0); FOR_PATCH(p) { vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); + val diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); diff_sq *= diff_sq; -#if PST && P >= PST - float pdist = length(p.xy*PSD)*PSS; - pdist = exp(-(pdist*pdist)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); -#endif + diff_sq = 1 - (1 - diff_sq) * spatial_p(p.xy); pdiff_sq += diff_sq; } min_rot = min(min_rot, pdiff_sq); @@ -694,14 +842,15 @@ vec4 patch_comparison(vec3 r, vec3 r2) #define NO_GATHER (PD == 0 && NG == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather // XXX extend to support arbitrary sizes (probably requires code generation) // XXX extend to support 3x3 square +// XXX support PSS const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; const ivec2 offsets_sf[4] = { ivec2(0,-1) * SF, ivec2(-1,0) * SF, ivec2(0,1) * SF, ivec2(1,0) * SF }; vec4 poi_patch = gather_offs(0, offsets); -vec4 patch_comparison_gather(vec3 r, vec3 r2) +float patch_comparison_gather(vec3 r, vec3 r2) { float min_rot = p_area - 1; vec4 transformer = gather_offs(r, offsets_sf); @@ -725,13 +874,12 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) } float center_diff_sq = poi2.x - load2(r).x; center_diff_sq *= center_diff_sq; - return vec4(min_rot + center_diff_sq, 0, 0, 0) * p_scale; + return (min_rot + center_diff_sq) * p_scale; } -#elif (defined(LUMA_gather) || D1W) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER +#elif (defined(LUMA_gather) || D1W) && PS == 6 && RI == 0 && RFI == 0 && NO_GATHER // tiled even square patch_comparison_gather // XXX extend to support odd square? -// XXX rotations/reflections appear to be subtly broken -vec4 patch_comparison_gather(vec3 r, vec3 r2) +float patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; float min_rot = p_area; @@ -740,40 +888,17 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) * w z * x y */ - FOR_ROTATION FOR_REFLECTION { - float pdiff_sq = 0; - for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { - vec4 poi_patch = gather(tile + r2.xy); - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - -#if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees -#endif -#if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } -#endif - - vec4 diff_sq = (poi_patch - transformer) * (poi_patch - transformer); -#if PST && P >= PST - // XXX refactor to avoid pow (should probably break off into a function) - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq += dot(diff_sq, vec4(1)); - } - min_rot = min(min_rot, pdiff_sq); + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { + vec4 diff_sq = gather(tile + r.xy) - gather(tile + r2.xy); + diff_sq *= diff_sq; + diff_sq = 1 - (1 - diff_sq) * vec4(spatial_p(tile+vec2(0,1)), spatial_p(tile+vec2(1,1)), + spatial_p(tile+vec2(1,0)), spatial_p(tile+vec2(0,0))); + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); - return vec4(min_rot, 0, 0, 0) * p_scale; + return min_rot * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -781,9 +906,9 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) vec4 hook() { - vec4 total_weight = vec4(0); - vec4 sum = vec4(0); - vec4 result = vec4(0); + val total_weight = val(0); + val sum = val(0); + val result = val(0); vec3 r = vec3(0); vec3 p = vec3(0); @@ -797,41 +922,38 @@ vec4 hook() float me_weight = 0; #endif -#if WD == 2 || M == 3 // weight discard, weighted median intensities +#if WD == 2 // weight discard int r_index = 0; - vec4 all_weights[r_area]; - vec4 all_pixels[r_area]; + val_packed all_weights[r_area]; + val_packed all_pixels[r_area]; #elif WD == 1 // weight discard - vec4 no_weights = vec4(0); - vec4 discard_total_weight = vec4(0); - vec4 discard_sum = vec4(0); -#endif - -#if M == 1 // Euclidean medians - vec4 minsum = vec4(0); + val no_weights = val(0); + val discard_total_weight = val(0); + val discard_sum = val(0); #endif FOR_FRAME(r) { // XXX ME is always a frame behind, should have to option to re-research after applying ME (could do it an arbitrary number of times per frame if desired) #if T && ME == 1 // temporal & motion estimation max weight if (r.z > 0) { - me += me_tmp; + me += me_tmp * MEF; me_tmp = vec3(0); maxweight = 0; } #elif T && ME == 2 // temporal & motion estimation weighted average if (r.z > 0) { - me += round(me_sum / me_weight); + me += round(me_sum / me_weight * MEF); me_sum = vec3(0); me_weight = 0; } #endif - FOR_RESEARCH(r) { - // main NLM logic - const float h = S*0.013; - const float pdiff_scale = 1.0/(h*h); - vec4 pdiff_sq = (r.z == 0) ? patch_comparison_gather(r+me, vec3(0)) : patch_comparison(r+me, vec3(0)); - vec4 weight = exp(-pdiff_sq * pdiff_scale); + FOR_RESEARCH(r) { // main NLM logic +#if SKIP_PATCH + val weight = val(1); +#else + val pdiff_sq = (r.z == 0) ? val(patch_comparison_gather(r+me, vec3(0))) : patch_comparison(r+me, vec3(0)); + val weight = range(pdiff_sq); +#endif #if T && ME == 1 // temporal & motion estimation max weight me_tmp = vec3(r.xy,0) * step(maxweight, weight.x) + me_tmp * (1 - step(maxweight, weight.x)); @@ -842,18 +964,18 @@ vec4 hook() #endif #if D1W - weight = vec4(weight.x); + weight = val(weight.x); #endif - weight *= exp(-(length(r*SD)*SS * length(r*SD)*SS)); // spatial kernel + weight *= spatial_r(r); -#if WD == 2 || M == 3 // weight discard, weighted median intensity - all_weights[r_index] = weight; - all_pixels[r_index] = load(r+me); +#if WD == 2 // weight discard + all_weights[r_index] = val_pack(weight); + all_pixels[r_index] = val_pack(load(r+me)); r_index++; #elif WD == 1 // weight discard - vec4 wd_scale = 1.0/max(no_weights, 1); - vec4 keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); + val wd_scale = 1.0/max(no_weights, 1); + val keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); discard_sum += load(r+me) * weight * (1 - keeps); discard_total_weight += weight * (1 - keeps); no_weights += keeps; @@ -861,45 +983,25 @@ vec4 hook() sum += load(r+me) * weight; total_weight += weight; - -#if M == 1 // Euclidean median - // Based on: https://arxiv.org/abs/1207.3056 - // XXX might not work with ME - vec3 r2; - vec4 wpdist_sum = vec4(0); - FOR_FRAME(r2) FOR_RESEARCH(r2) { - vec4 pdist = (r.z + r2.z) == 0 ? patch_comparison_gather(r+me, r2+me) : patch_comparison(r+me, r2+me); - wpdist_sum += sqrt(pdist) * (1-weight); - } - - vec4 newmin = step(wpdist_sum, minsum); // wpdist_sum <= minsum - newmin *= 1 - step(wpdist_sum, vec4(0)); // && wpdist_sum > 0 - newmin += step(minsum, vec4(0)); // || minsum <= 0 - newmin = min(newmin, 1); - - minsum = (newmin * wpdist_sum) + ((1-newmin) * minsum); - result = (newmin * load(r+me)) + ((1-newmin) * result); -#endif } // FOR_RESEARCH } // FOR_FRAME - // XXX optionally put the denoised pixel into the frame buffer? -#if T // temporal -#endif - - vec4 avg_weight = total_weight * r_scale; - vec4 old_avg_weight = avg_weight; + val avg_weight = total_weight * r_scale; + val old_avg_weight = avg_weight; #if WD == 2 // true average - total_weight = vec4(0); - sum = vec4(0); - vec4 no_weights = vec4(0); + total_weight = val(0); + sum = val(0); + val no_weights = val(0); for (int i = 0; i < r_area; i++) { - vec4 keeps = step(avg_weight*WDT, all_weights[i]); - all_weights[i] *= keeps; - sum += all_pixels[i] * all_weights[i]; - total_weight += all_weights[i]; + val w = val_unpack(all_weights[i]); + val px = val_unpack(all_pixels[i]); + val keeps = step(avg_weight*WDT, w); + + w *= keeps; + sum += px * w; + total_weight += w; no_weights += keeps; } #elif WD == 1 // moving cumulative average @@ -910,29 +1012,23 @@ vec4 hook() avg_weight = total_weight / no_weights; #endif - total_weight += SW; - sum += poi * SW; + total_weight += SW * spatial_r(vec3(0)); + sum += poi * SW * spatial_r(vec3(0)); -#if M == 3 // weighted median intensity - const float hr_area = r_area/2.0; - vec4 is_median, gt, lt, gte, lte, neq; +#if V == 3 // weight map + result = val(avg_weight); +#else // mean + result = val(sum / total_weight); +#endif - for (int i = 0; i < r_area; i++) { - gt = lt = vec4(0); - for (int j = 0; j < r_area; j++) { - gte = step(all_pixels[i]*all_weights[i], all_pixels[j]*all_weights[j]); - lte = step(all_pixels[j]*all_weights[j], all_pixels[i]*all_weights[i]); - neq = 1 - gte * lte; - gt += gte * neq; - lt += lte * neq; - } - is_median = step(gt, vec4(hr_area)) * step(lt, vec4(hr_area)); - result += step(result, vec4(0)) * is_median * all_pixels[i]; - } -#elif M == 2 // weight map - result = avg_weight; -#elif M == 0 // mean - result = sum / total_weight; + // store frames for temporal +#if T > 1 + +#endif +#if T && TRF + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(result)); +#elif T + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(poi2)); #endif #if ASW == 0 // pre-WD weights @@ -942,22 +1038,20 @@ vec4 hook() #endif #if ASK == 0 - vec4 sharpening_strength = pow(AS_weight, vec4(ASP)); + val sharpening_strength = pow(AS_weight, val(ASP)); #elif ASK == 1 -#define sigmoid(x) (tanh(x * 2*M_PI - M_PI)*0.5+0.5) - vec4 sharpening_strength = mix(pow(sigmoid(AS_weight), vec4(ASP)), - AS_weight, ASC); - // just in case ASC < 0 (will sharpen but it's janky XXX) - sharpening_strength = clamp(sharpening_strength, 0.0, 1.0); + val sharpening_strength = mix( + pow(smoothstep(0.0, 1.0, AS_weight), val(ASP)), + AS_weight, ASC); + // XXX normalize the result to account for a negative ASC? #elif ASK == 2 - vec4 sharpening_strength = vec4(ASP); + val sharpening_strength = val(ASP); #endif - // XXX maybe allow for alternative blurs? e.g., replace result w/ load2? #if AS == 1 // sharpen+denoise - vec4 sharpened = result + (poi - result) * ASF; + val sharpened = result + (poi - result) * ASF; #elif AS == 2 // sharpen only - vec4 sharpened = poi + (poi - result) * ASF; + val sharpened = poi + (poi - result) * ASF; #endif #if EP // extremes preserve @@ -973,20 +1067,20 @@ vec4 hook() result = mix(sharpened, poi, sharpening_strength); #endif -#if M == 4 // edge map +#if V == 4 // edge map result = sharpening_strength; #endif -#if (M == 2 || M == 4) && defined(CHROMA_raw) // drop chroma for weight maps - result = vec4(0.5); +#if (V == 3 || V == 4) && defined(CHROMA_raw) // drop chroma for these visualizations + return vec4(0.5); #endif -#if DV == 1 - result = clamp(abs(poi - result) * S, 0.0, 1.0); -#elif DV == 2 +#if V == 1 + result = clamp(pow(abs(poi - result), val(0.25)), 0.0, 1.0); +#elif V == 2 result = (poi - result) * 0.5 + 0.5; #endif - return mix(poi, result, BF); + return unval(mix(poi, result, BF)); } diff --git a/portable_config/shaders/nlmeans_temporal.glsl b/portable_config/shaders/nlmeans_temporal.glsl index 01dfdd52..a3bf340d 100644 --- a/portable_config/shaders/nlmeans_temporal.glsl +++ b/portable_config/shaders/nlmeans_temporal.glsl @@ -19,7 +19,7 @@ * along with this program. If not, see . */ -// Profile description: Very experimental and buggy, limited to vo=gpu-next. +// Description: nlmeans_temporal.glsl: Very experimental and buggy, limited to vo=gpu-next. /* The recommended usage of this shader and its variant profiles is to add them * to input.conf and then dispatch the appropriate shader via a keybind during @@ -48,8 +48,8 @@ * of noise. * * The denoiser will not work properly if the content has been upscaled - * beforehand, whether it was done by you or someone down the line. Consider - * issuing a command to downscale in the mpv console, like so: + * beforehand (whether it was done by you or not). In such cases, consider + * issuing a command to downscale in the mpv console (backtick ` key): * * vf toggle scale=-2:720 * @@ -65,12 +65,13 @@ * may be different for your system. * * If your GPU doesn't support textureGather, or if you are on a version of mpv - * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ and VLQ - * profiles. + * prior to 0.35.0, then consider setting RI/RFI to 0, or try the LQ profile * - * textureGather is LUMA only and limited to the following configurations: + * If you plan on tinkering with NLM's settings, read below: * - * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2}:M!=1 + * textureGather only applies to luma and limited to the these configurations: + * + * - PS={3,7}:P=3:PST=0:RI={0,1,3}:RFI={0,1,2} * - Default, very fast, rotations and reflections should be free * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. @@ -83,6 +84,7 @@ * * Options which always disable textureGather: * - PD + * - NG */ // The following is shader code injected from guided.glsl @@ -104,7 +106,7 @@ * along with this program. If not, see . */ -//desc: Guided filter guided by the downscaled image +// Description: guided.glsl: Guided by the downscaled image /* The radius can be adjusted with the MEANI stage's downscaling factor. * Higher numbers give a bigger radius. @@ -120,10 +122,10 @@ //!HOOK LUMA //!HOOK CHROMA -//!DESC Guided filter (PREI) //!BIND HOOKED //!WIDTH HOOKED.w 1.25 / //!HEIGHT HOOKED.h 1.25 / +//!DESC Guided filter (PREI) //!SAVE _INJ_PREI vec4 hook() @@ -133,10 +135,10 @@ vec4 hook() //!HOOK LUMA //!HOOK CHROMA -//!DESC Guided filter (I) //!BIND _INJ_PREI -//!WIDTH HOOKED.w 1.0 / -//!HEIGHT HOOKED.h 1.0 / +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h +//!DESC Guided filter (I) //!SAVE _INJ_I vec4 hook() @@ -144,6 +146,7 @@ vec4 hook() return _INJ_PREI_texOff(0); } + //!HOOK LUMA //!HOOK CHROMA //!DESC Guided filter (P) @@ -310,72 +313,54 @@ vec4 hook() return _INJ_MEANA_texOff(0) * HOOKED_texOff(0) + _INJ_MEANB_texOff(0); } -// End of source code injected from guided.glsl +// End of source code injected from guided.glsl + //!HOOK LUMA //!HOOK CHROMA -//!DESC Non-local means (downscale) -//!WIDTH LUMA.w 3 / -//!HEIGHT LUMA.h 3 / -//!BIND LUMA -//!SAVE EP +//!BIND RF_LUMA +//!WIDTH RF_LUMA.w +//!HEIGHT RF_LUMA.h +//!DESC Non-local means (RF, share) +//!SAVE RF vec4 hook() { - return LUMA_texOff(0); + return RF_LUMA_texOff(0); } //!HOOK LUMA //!HOOK CHROMA -//!DESC Non-local means (share) -//!BIND RF_LUMA -//!SAVE RF +//!BIND LUMA +//!WIDTH LUMA.w 3 / +//!HEIGHT LUMA.h 3 / +//!DESC Non-local means (EP) +//!SAVE EP vec4 hook() { - return RF_LUMA_texOff(0); + return LUMA_texOff(0); } //!HOOK LUMA //!HOOK CHROMA //!BIND HOOKED //!BIND RF_LUMA -//!BIND EP //!BIND RF +//!BIND EP //!BIND PREV1 //!BIND PREV2 -//!BIND PREV3 //!DESC Non-local means (nlmeans_temporal.glsl) -/* User variables - * - * It is usually preferable to denoise chroma and luma differently, so the user - * variables for luma and chroma are split. - */ +// User variables -/* S = denoising factor - * P = patch size - * R = research size - * - * The denoising factor controls the level of blur, higher is blurrier. - * - * Patch size should usually be an odd number greater than or equal to 3. - * Higher values are slower and not always better. - * - * Research size usually be an odd number greater than or equal to 3. Higher - * values are usually better, but slower and offer diminishing returns. - * - * Even-numbered patch/research sizes will sample between pixels unless PS=6. - * It's not known whether this is ever useful behavior or not. This is - * incompatible with textureGather optimizations, so NG=1 to disable them. - */ +// It is generally preferable to denoise luma and chroma differently, so the +// user variables for luma and chroma are split. + +// Denoising factor (level of blur, higher means more blur) #ifdef LUMA_raw #define S 2.0 -#define P 3 -#define R 5 #else #define S 5.0 -#define P 3 -#define R 5 #endif /* Adaptive sharpening @@ -383,11 +368,16 @@ vec4 hook() * Uses the blur incurred by denoising to perform an unsharp mask, and uses the * weight map to restrict the sharpening to edges. * - * Use M=4 to get a good look at which areas are/aren't sharpened. + * If you just want to increase/decrease sharpness then you want to change ASF. * - * AS: 2 for sharpening, 1 for sharpening+denoising, 0 to disable - * ASF: Sharpening factor, higher numbers make a sharper underlying image - * ASP: Weight power, higher numbers use more of the sharp image + * Use V=4 to visualize which areas are sharpened (black means sharpen). + * + * AS: + * - 0 to disable + * - 1 to sharpen+denoise + * - 2 to sharpen only + * ASF: Higher numbers make a sharper image + * ASP: Higher numbers use more of the sharp image * ASW: * - 0 to use pre-WD weights * - 1 to use post-WD weights (ASP should be ~2x to compensate) @@ -399,15 +389,15 @@ vec4 hook() */ #ifdef LUMA_raw #define AS 0 -#define ASF 2.0 -#define ASP 4.0 +#define ASF 3.0 +#define ASP 1.0 #define ASW 0 #define ASK 1 #define ASC 0.0 #else #define AS 0 -#define ASF 2.0 -#define ASP 4.0 +#define ASF 3.0 +#define ASP 1.0 #define ASW 0 #define ASK 1 #define ASC 0.0 @@ -415,12 +405,10 @@ vec4 hook() /* Starting weight * - * Lower numbers give less weight to the pixel-of-interest, which may help - * handle higher noise levels, ringing, and may be useful for other things too? + * Also known as the center weight. This represents the weight of the + * pixel-of-interest. Lower numbers may help handle heavy noise & ringing. * - * EPSILON should be used instead of zero to avoid divide-by-zero errors. The - * avg_weight/old_avg_weight variables may be used to make SW adapt to the - * local noise level, e.g., SW=max(avg_weight, EPSILON) + * EPSILON should be used instead of zero to avoid divide-by-zero errors. */ #ifdef LUMA_raw #define SW 1.0 @@ -435,7 +423,7 @@ vec4 hook() * result, especially around edges. * * WD: - * - 2: True average. Very good quality, but slower and uses more memory. + * - 2: True average. Better quality, but slower and requires GLSL 4.0 or later * - 1: Moving cumulative average. Inaccurate, tends to blur directionally. * - 0: Disable * @@ -454,12 +442,14 @@ vec4 hook() /* Extremes preserve * - * Reduces denoising around very bright/dark areas. The downscaling factor of - * EP (located near the top of this shader) controls the area sampled for - * luminance (higher numbers consider more area). + * Reduces denoising around very bright/dark areas. + * + * The downscaling factor of the EP shader stage affects what is considered a + * bright/dark area. The default of 3 should be fine, it's not recommended to + * change this. * * This is incompatible with RGB. If you have RGB hooks enabled then you will - * have to delete the EP shader stage or specify EP=0 through nlmeans_cfg. + * have to delete the EP shader stage or specify EP=0 through shader_cfg. * * EP: 1 to enable, 0 to disable * DP: EP strength on dark patches, 0 to fully denoise @@ -481,25 +471,26 @@ vec4 hook() /* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ /* ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS * ADVANCED OPTIONS */ -/* Robust filtering +/* Patch & research sizes * - * This setting is dependent on code generation from nlmeans_cfg, so this - * setting can only be enabled via nlmeans_cfg. + * Patch size should be an odd number greater than or equal to 3. Higher values + * are slower and not always better. * - * Compares the pixel-of-interest against a guide, which could be a downscaled - * image or the output of another shader such as guided.glsl + * Research size be an odd number greater than or equal to 3. Higher values are + * generally better, but slower, blurrier, and gives diminishing returns. */ #ifdef LUMA_raw -#define RF 1 +#define P 3 +#define R 5 #else -#define RF 1 +#define P 3 +#define R 5 #endif -/* Search shape +/* Patch and research shapes * - * Determines the shape of patches and research zones. Different shapes have - * different speed and quality characteristics. Every shape (besides square) is - * smaller than square. + * Different shapes have different speed and quality characteristics. Every + * shape (besides square) is smaller than square. * * PS applies applies to patches, RS applies to research zones. * @@ -522,11 +513,22 @@ vec4 hook() #define PS 3 #endif +/* Robust filtering + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. + * + * Compares the pixel-of-interest against a guide, which could be a downscaled + * image or the output of another shader + */ +#define RF_LUMA 1 +#define RF 1 + /* Rotational/reflectional invariance * - * Number of rotations/reflections to try for each patch comparison. Slow, but - * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. The most similar rotation/reflection will be used. + * Number of rotations/reflections to try for each patch comparison. Can be + * slow, but improves feature preservation. More rotations/reflections gives + * diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -543,29 +545,39 @@ vec4 hook() #endif /* Temporal denoising + * + * This setting is dependent on code generation from shader_cfg, so this + * setting can only be enabled via shader_cfg. * * Caveats: - * - Slower, each frame needs to be researched - * - Requires vo=gpu-next and nlmeans_temporal.glsl + * - Slower: + * - Each frame needs to be researched (more samples & more math) + * - Gather optimizations only apply to the current frame + * - Requires vo=gpu-next * - Luma-only (this is a bug) * - Buggy * - * Gather samples across multiple frames. May cause motion blur and may - * struggle more with noise that persists across multiple frames (e.g., from - * compression or duplicate frames), but can work very well on high quality - * video. + * May cause motion blur and may struggle more with noise that persists across + * multiple frames (e.g., from compression or duplicate frames), but can work + * very well on high quality video. * * Motion estimation (ME) should improve quality without impacting speed. * * T: number of frames used * ME: motion estimation, 0 for none, 1 for max weight, 2 for weighted avg + * MEF: estimate factor, compensates for ME being one frame behind + * TRF: compare against the denoised frames */ #ifdef LUMA_raw #define T 2 #define ME 1 +#define MEF 2 +#define TRF 0 #else #define T 0 #define ME 0 +#define MEF 2 +#define TRF 0 #endif /* Spatial kernel @@ -577,69 +589,79 @@ vec4 hook() * closer/further, for instance SD=(1,1,0.5) would make the temporal axis * appear closer and increase blur between frames. * - * The intra-patch variants do not yet have well-understood effects. They are - * intended to make large patch sizes more useful. Likely slower. + * The intra-patch variants are supposed to help with larger patch sizes. * - * SS: spatial denoising factor + * SST: enables spatial kernel if R>=PST, 0 fully disables + * SS: spatial sigma * SD: spatial distortion (X, Y, time) - * PSS: intra-patch spatial denoising factor + * PSS: intra-patch spatial sigma * PST: enables intra-patch spatial kernel if P>=PST, 0 fully disables * PSD: intra-patch spatial distortion (X, Y) */ #ifdef LUMA_raw +#define SST 1 #define SS 0.25 -#define SD vec3(1,1,1.5) +#define SD vec3(1,1,1) #define PST 0 #define PSS 0.0 #define PSD vec2(1,1) #else +#define SST 1 #define SS 0.25 -#define SD vec3(1,1,1.5) +#define SD vec3(1,1,1) #define PST 0 #define PSS 0.0 #define PSD vec2(1,1) #endif -// Scaling factor (should match WIDTH/HEIGHT) +/* Kernels + * + * SK: spatial kernel + * RK: range kernel (takes patch differences) + * PSK: intra-patch spatial kernel + * + * List of available kernels: + * + * bicubic + * cos + * gaussian + * lanczos + * quadratic + * sinc + * sphinx + */ #ifdef LUMA_raw -#define SF 1 +#define SK gaussian +#define RK gaussian +#define PSK gaussian #else -#define SF 1 +#define SK gaussian +#define RK gaussian +#define PSK gaussian #endif -/* Estimator - * - * 0: means - * 1: Euclidean medians (extremely slow, may be good for heavy noise) - * 2: weight map (not a denoiser, maybe useful for generating image masks) - * 3: weighted median intensity (slow, may be good for heavy noise) - * 4: edge map (based on the relevant AS settings) - */ +// Scaling factor (should match WIDTH/HEIGHT) #ifdef LUMA_raw -#define M 0 +#define SF 1 #else -#define M 0 +#define SF 1 #endif -/* Difference visualization - * - * Visualizes the difference between input/output image +/* Visualization * * 0: off - * 1: absolute difference scaled by S - * 2: difference centered on 0.5 + * 1: absolute difference between input/output to the power of 0.25 + * 2: difference between input/output centered on 0.5 + * 3: avg_weight + * 4: edge map (based on the relevant AS settings) */ #ifdef LUMA_raw -#define DV 0 +#define V 0 #else -#define DV 0 +#define V 0 #endif -/* Blur factor - * - * 0 to 1, only useful for alternative estimators. You're probably looking for - * "S" (denoising factor), go back to the top of the shader! - */ +// Blur factor (0.0 returns the input image, 1.0 returns the output image) #ifdef LUMA_raw #define BF 1.0 #else @@ -660,17 +682,57 @@ vec4 hook() #define PD 0 #endif -// Duplicate 1st weight (for LGC) +// Duplicate 1st weight (for luma-guided-chroma) #ifdef LUMA_raw #define D1W 0 #else #define D1W 0 #endif -/* Shader code */ +// Skip patch comparison +#ifdef LUMA_raw +#define SKIP_PATCH 0 +#else +#define SKIP_PATCH 0 +#endif + +// Shader code #define EPSILON 0.00000000001 #define M_PI 3.14159265358979323846 +#define POW2(x) ((x)*(x)) +#define POW3(x) ((x)*(x)*(x)) +#define bicubic(x) ((1.0/6.0) * (POW3((x)+2) - 4 * POW3((x)+1) + 6 * POW3(x) - 4 * POW3(max((x)-1, 0)))) +#define gaussian(x) exp(-1 * POW2(x)) +#define lanczos(x) POW2(sinc(x)) +#define quadratic(x) ((x) < 0.5 ? 0.75 - POW2(x) : 0.5 * POW2((x) - 1.5)) +#define sinc(x) ((x) < 1e-8 ? 1.0 : sin((x)*M_PI) / ((x)*M_PI)) +#define sphinx(x) ((x) < 1e-8 ? 1.0 : 3.0 * (sin((x)*M_PI) - (x)*M_PI * cos((x)*M_PI)) / POW3((x)*M_PI)) + +// XXX could maybe be better optimized on LGC +// XXX return original alpha component instead of 1.0 +#if defined(LUMA_raw) +#define val float +#define val_swizz(v) (v.x) +#define unval(v) vec4(v.x, 0, 0, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#elif defined(CHROMA_raw) +#define val vec2 +#define val_swizz(v) (v.xy) +#define unval(v) vec4(v.x, v.y, 0, 1.0) +#define val_packed uint +#define val_pack(v) packUnorm2x16(v) +#define val_unpack(v) unpackUnorm2x16(v) +#else +#define val vec3 +#define val_swizz(v) (v.xyz) +#define unval(v) vec4(v.x, v.y, v.z, 1.0) +#define val_packed val +#define val_pack(v) (v) +#define val_unpack(v) (v) +#endif #if PS == 6 const int hp = P/2; @@ -685,39 +747,96 @@ const float hr = int(R/2) - 0.5*(1-(R%2)); // sample between pixels for even res #endif // donut increment, increments without landing on (0,0,0) -// much faster than a "continue" statement +// much faster than a continue statement #define DINCR(z,c) (z.c++,(z.c += int(z == vec3(0)))) -// search shapes and their corresponding areas -#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) +// patch/research shapes +// each shape is depicted in a comment, where Z=5 (Z corresponds to P or R) +// dots (.) represent samples (pixels) and X represents the pixel-of-interest + +// Z ..... +// Z ..... +// Z ..X.. +// Z ..... +// Z ..... +#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) + +// (in this instance Z=4) +// Z .... +// Z .... +// Z ..X. +// Z .... +#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) +// Z-4 . +// Z-2 ... +// Z ..X.. #define S_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz); incr) + +// Z-4 . +// Z-2 ... +// hz+1 ..X #define S_TRUNC_TRIANGLE(z,hz,incr) for (z.y = -hz; z.y <= 0; z.y++) for (z.x = -abs(abs(z.y) - hz); z.x <= abs(abs(z.y) - hz)*int(z.y!=0); incr) #define S_TRIANGLE_A(hz,Z) int(hz*hz+Z) +// Z-4 . +// Z-2 ... +// Z ..X.. +// Z-2 ... +// Z-4 . #define S_DIAMOND(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -abs(abs(z.x) - hz); z.y <= abs(abs(z.x) - hz); incr) #define S_DIAMOND_A(hz,Z) int(hz*hz*2+Z) -#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) +// +// Z ..X.. +// #define S_HORIZONTAL(z,hz,incr) for (z.x = -hz; z.x <= hz; incr) for (z.y = 0; z.y <= 0; z.y++) +// 90 degree rotation of S_HORIZONTAL +#define S_VERTICAL(z,hz,incr) for (z.x = 0; z.x <= 0; z.x++) for (z.y = -hz; z.y <= hz; incr) + +// 1 . +// 1 . +// Z ..X.. +// 1 . +// 1 . #define S_PLUS(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz * int(z.x == 0); z.y <= hz * int(z.x == 0); incr) #define S_PLUS_A(hz,Z) (Z*2 - 1) -#define S_SQUARE(z,hz,incr) for (z.x = -hz; z.x <= hz; z.x++) for (z.y = -hz; z.y <= hz; incr) -#define S_SQUARE_EVEN(z,hz,incr) for (z.x = -hz; z.x < hz; z.x++) for (z.y = -hz; z.y < hz; incr) +// XXX implement S_PLUS w/ an X overlayed: +// 3 . . . +// 3 ... +// Z ..X.. +// 3 ... +// 3 . . . + +// XXX implement an X shape: +// 2 . . +// 2 . . +// 1 X +// 2 . . +// 2 . . + +// 1x1 square +#define S_1X1(z) for (z = vec3(0); z.x <= 0; z.x++) #define T1 (T+1) #define FOR_FRAME(r) for (r.z = 0; r.z < T1; r.z++) +#ifdef LUMA_raw +#define RF_ RF_LUMA +#else +#define RF_ RF +#endif + // Skip comparing the pixel-of-interest against itself, unless RF is enabled -#if RF +#if RF_ #define RINCR(z,c) (z.c++) #else #define RINCR DINCR #endif -#define R_AREA(a) (a * T1 + RF-1) +#define R_AREA(a) (a * T1 + RF_-1) // research shapes // XXX would be nice to have the option of temporally-varying research sizes @@ -806,50 +925,45 @@ const int p_area = P_AREA(P*P); const float r_scale = 1.0/r_area; const float p_scale = 1.0/p_area; -#define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define sample(tex, pos, size, pt, off) tex(pos + pt * (vec2(off) + 0.5 - fract(pos*size))) +#define load_(off) sample(HOOKED_tex, HOOKED_pos, HOOKED_size, HOOKED_pt, off) -#if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#if RF_ && defined(LUMA_raw) +#define load2_(off) sample(RF_LUMA_tex, RF_LUMA_pos, RF_LUMA_size, RF_LUMA_pt, off) #define gather_offs(off, off_arr) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, off_arr))) #define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off) * RF_LUMA_pt, 0) -#elif RF && D1W -#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) +#elif RF_ && D1W +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) #define gather_offs(off, off_arr) (RF_mul * vec4(textureGatherOffsets(RF_raw, RF_pos + vec2(off) * RF_pt, off_arr))) #define gather(off) RF_gather(RF_pos + (off) * RF_pt, 0) -#elif RF -#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) +#elif RF_ +#define load2_(off) sample(RF_tex, RF_pos, RF_size, RF_pt, off) #else -#define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) load_(off) #define gather_offs(off, off_arr) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, off_arr))) #define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T -vec4 load(vec3 off) +val load(vec3 off) { - switch (int(off.z)) { - case 0: return load_(off); - case 1: return imageLoad(PREV1, ivec2((HOOKED_pos + HOOKED_pt * vec2(off)) * imageSize(PREV1))); - case 2: return imageLoad(PREV2, ivec2((HOOKED_pos + HOOKED_pt * vec2(off)) * imageSize(PREV2))); - case 3: return imageLoad(PREV3, ivec2((HOOKED_pos + HOOKED_pt * vec2(off)) * imageSize(PREV3))); + switch (min(int(off.z), frame)) { + case 0: return val_swizz(load_(off)); + case 1: return val_swizz(imageLoad(PREV1, ivec2((HOOKED_pos + HOOKED_pt * vec2(off)) * imageSize(PREV1)))); + case 2: return val_swizz(imageLoad(PREV2, ivec2((HOOKED_pos + HOOKED_pt * vec2(off)) * imageSize(PREV2)))); } } -vec4 load2(vec3 off) +val load2(vec3 off) { - switch (int(off.z)) { - case 0: return load2_(off); - case 1: return imageLoad(PREV1, ivec2((HOOKED_pos + HOOKED_pt * vec2(off)) * imageSize(PREV1))); - case 2: return imageLoad(PREV2, ivec2((HOOKED_pos + HOOKED_pt * vec2(off)) * imageSize(PREV2))); - case 3: return imageLoad(PREV3, ivec2((HOOKED_pos + HOOKED_pt * vec2(off)) * imageSize(PREV3))); - } + return off.z == 0 ? val_swizz(load2_(off)) : load(off); } #else -#define load(off) load_(off) -#define load2(off) load2_(off) +#define load(off) val_swizz(load_(off)) +#define load2(off) val_swizz(load2_(off)) #endif -vec4 poi = load(vec3(0)); // pixel-of-interest -vec4 poi2 = load2(vec3(0)); // guide pixel-of-interest +val poi = load(vec3(0)); // pixel-of-interest +val poi2 = load2(vec3(0)); // guide pixel-of-interest #if RI // rotation vec2 rot(vec2 p, float d) @@ -876,22 +990,52 @@ vec2 ref(vec2 p, int d) #define ref(p, d) (p) #endif -vec4 patch_comparison(vec3 r, vec3 r2) +#if SST && R >= SST +float spatial_r(vec3 v) +{ + v.xy += 0.5 - fract(HOOKED_pos*HOOKED_size); + return SK(length(v*SD)*SS); +} +#else +#define spatial_r(v) (1) +#endif + +#if PST && P >= PST +#define spatial_p(v) PSK(length(v*PSD)*PSS) +#else +#define spatial_p(v) (1) +#endif + +val range(val pdiff_sq) +{ + const float h = S*0.013; + const float pdiff_scale = 1.0/(h*h); + pdiff_sq = sqrt(pdiff_sq * pdiff_scale); +#if defined(LUMA_raw) + return RK(pdiff_sq); +#elif defined(CHROMA_raw) + return vec2(RK(pdiff_sq.x), RK(pdiff_sq.y)); +#else + return vec3(RK(pdiff_sq.x), RK(pdiff_sq.y), RK(pdiff_sq.z)); +#endif + //return exp(-pdiff_sq * pdiff_scale); + + // weight function from the NLM paper, it's not very good + //return exp(-max(pdiff_sq - 2*S*S, 0.0) * pdiff_scale); +} + +val patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 min_rot = vec4(p_area); + val min_rot = val(p_area); FOR_ROTATION FOR_REFLECTION { - vec4 pdiff_sq = vec4(0); + val pdiff_sq = val(0); FOR_PATCH(p) { vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); + val diff_sq = load2(p + r2) - load2((transformed_p + r) * SF); diff_sq *= diff_sq; -#if PST && P >= PST - float pdist = length(p.xy*PSD)*PSS; - pdist = exp(-(pdist*pdist)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); -#endif + diff_sq = 1 - (1 - diff_sq) * spatial_p(p.xy); pdiff_sq += diff_sq; } min_rot = min(min_rot, pdiff_sq); @@ -903,14 +1047,15 @@ vec4 patch_comparison(vec3 r, vec3 r2) #define NO_GATHER (PD == 0 && NG == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if (defined(LUMA_gather) || D1W) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather // XXX extend to support arbitrary sizes (probably requires code generation) // XXX extend to support 3x3 square +// XXX support PSS const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; const ivec2 offsets_sf[4] = { ivec2(0,-1) * SF, ivec2(-1,0) * SF, ivec2(0,1) * SF, ivec2(1,0) * SF }; vec4 poi_patch = gather_offs(0, offsets); -vec4 patch_comparison_gather(vec3 r, vec3 r2) +float patch_comparison_gather(vec3 r, vec3 r2) { float min_rot = p_area - 1; vec4 transformer = gather_offs(r, offsets_sf); @@ -934,13 +1079,12 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) } float center_diff_sq = poi2.x - load2(r).x; center_diff_sq *= center_diff_sq; - return vec4(min_rot + center_diff_sq, 0, 0, 0) * p_scale; + return (min_rot + center_diff_sq) * p_scale; } -#elif (defined(LUMA_gather) || D1W) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER +#elif (defined(LUMA_gather) || D1W) && PS == 6 && RI == 0 && RFI == 0 && NO_GATHER // tiled even square patch_comparison_gather // XXX extend to support odd square? -// XXX rotations/reflections appear to be subtly broken -vec4 patch_comparison_gather(vec3 r, vec3 r2) +float patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; float min_rot = p_area; @@ -949,40 +1093,17 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) * w z * x y */ - FOR_ROTATION FOR_REFLECTION { - float pdiff_sq = 0; - for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { - vec4 poi_patch = gather(tile + r2.xy); - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - -#if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees -#endif -#if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } -#endif - - vec4 diff_sq = (poi_patch - transformer) * (poi_patch - transformer); -#if PST && P >= PST - // XXX refactor to avoid pow (should probably break off into a function) - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq += dot(diff_sq, vec4(1)); - } - min_rot = min(min_rot, pdiff_sq); + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { + vec4 diff_sq = gather(tile + r.xy) - gather(tile + r2.xy); + diff_sq *= diff_sq; + diff_sq = 1 - (1 - diff_sq) * vec4(spatial_p(tile+vec2(0,1)), spatial_p(tile+vec2(1,1)), + spatial_p(tile+vec2(1,0)), spatial_p(tile+vec2(0,0))); + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); - return vec4(min_rot, 0, 0, 0) * p_scale; + return min_rot * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -990,9 +1111,9 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) vec4 hook() { - vec4 total_weight = vec4(0); - vec4 sum = vec4(0); - vec4 result = vec4(0); + val total_weight = val(0); + val sum = val(0); + val result = val(0); vec3 r = vec3(0); vec3 p = vec3(0); @@ -1006,41 +1127,38 @@ vec4 hook() float me_weight = 0; #endif -#if WD == 2 || M == 3 // weight discard, weighted median intensities +#if WD == 2 // weight discard int r_index = 0; - vec4 all_weights[r_area]; - vec4 all_pixels[r_area]; + val_packed all_weights[r_area]; + val_packed all_pixels[r_area]; #elif WD == 1 // weight discard - vec4 no_weights = vec4(0); - vec4 discard_total_weight = vec4(0); - vec4 discard_sum = vec4(0); -#endif - -#if M == 1 // Euclidean medians - vec4 minsum = vec4(0); + val no_weights = val(0); + val discard_total_weight = val(0); + val discard_sum = val(0); #endif FOR_FRAME(r) { // XXX ME is always a frame behind, should have to option to re-research after applying ME (could do it an arbitrary number of times per frame if desired) #if T && ME == 1 // temporal & motion estimation max weight if (r.z > 0) { - me += me_tmp; + me += me_tmp * MEF; me_tmp = vec3(0); maxweight = 0; } #elif T && ME == 2 // temporal & motion estimation weighted average if (r.z > 0) { - me += round(me_sum / me_weight); + me += round(me_sum / me_weight * MEF); me_sum = vec3(0); me_weight = 0; } #endif - FOR_RESEARCH(r) { - // main NLM logic - const float h = S*0.013; - const float pdiff_scale = 1.0/(h*h); - vec4 pdiff_sq = (r.z == 0) ? patch_comparison_gather(r+me, vec3(0)) : patch_comparison(r+me, vec3(0)); - vec4 weight = exp(-pdiff_sq * pdiff_scale); + FOR_RESEARCH(r) { // main NLM logic +#if SKIP_PATCH + val weight = val(1); +#else + val pdiff_sq = (r.z == 0) ? val(patch_comparison_gather(r+me, vec3(0))) : patch_comparison(r+me, vec3(0)); + val weight = range(pdiff_sq); +#endif #if T && ME == 1 // temporal & motion estimation max weight me_tmp = vec3(r.xy,0) * step(maxweight, weight.x) + me_tmp * (1 - step(maxweight, weight.x)); @@ -1051,18 +1169,18 @@ vec4 hook() #endif #if D1W - weight = vec4(weight.x); + weight = val(weight.x); #endif - weight *= exp(-(length(r*SD)*SS * length(r*SD)*SS)); // spatial kernel + weight *= spatial_r(r); -#if WD == 2 || M == 3 // weight discard, weighted median intensity - all_weights[r_index] = weight; - all_pixels[r_index] = load(r+me); +#if WD == 2 // weight discard + all_weights[r_index] = val_pack(weight); + all_pixels[r_index] = val_pack(load(r+me)); r_index++; #elif WD == 1 // weight discard - vec4 wd_scale = 1.0/max(no_weights, 1); - vec4 keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); + val wd_scale = 1.0/max(no_weights, 1); + val keeps = step(total_weight*wd_scale * WDT*exp(-wd_scale*WDP), weight); discard_sum += load(r+me) * weight * (1 - keeps); discard_total_weight += weight * (1 - keeps); no_weights += keeps; @@ -1070,48 +1188,25 @@ vec4 hook() sum += load(r+me) * weight; total_weight += weight; - -#if M == 1 // Euclidean median - // Based on: https://arxiv.org/abs/1207.3056 - // XXX might not work with ME - vec3 r2; - vec4 wpdist_sum = vec4(0); - FOR_FRAME(r2) FOR_RESEARCH(r2) { - vec4 pdist = (r.z + r2.z) == 0 ? patch_comparison_gather(r+me, r2+me) : patch_comparison(r+me, r2+me); - wpdist_sum += sqrt(pdist) * (1-weight); - } - - vec4 newmin = step(wpdist_sum, minsum); // wpdist_sum <= minsum - newmin *= 1 - step(wpdist_sum, vec4(0)); // && wpdist_sum > 0 - newmin += step(minsum, vec4(0)); // || minsum <= 0 - newmin = min(newmin, 1); - - minsum = (newmin * wpdist_sum) + ((1-newmin) * minsum); - result = (newmin * load(r+me)) + ((1-newmin) * result); -#endif } // FOR_RESEARCH } // FOR_FRAME - // XXX optionally put the denoised pixel into the frame buffer? -#if T // temporal - imageStore(PREV3, ivec2(HOOKED_pos*imageSize(PREV3)), load2(vec3(0,0,2))); - imageStore(PREV2, ivec2(HOOKED_pos*imageSize(PREV2)), load2(vec3(0,0,1))); - imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), load2(vec3(0,0,0))); -#endif - - vec4 avg_weight = total_weight * r_scale; - vec4 old_avg_weight = avg_weight; + val avg_weight = total_weight * r_scale; + val old_avg_weight = avg_weight; #if WD == 2 // true average - total_weight = vec4(0); - sum = vec4(0); - vec4 no_weights = vec4(0); + total_weight = val(0); + sum = val(0); + val no_weights = val(0); for (int i = 0; i < r_area; i++) { - vec4 keeps = step(avg_weight*WDT, all_weights[i]); - all_weights[i] *= keeps; - sum += all_pixels[i] * all_weights[i]; - total_weight += all_weights[i]; + val w = val_unpack(all_weights[i]); + val px = val_unpack(all_pixels[i]); + val keeps = step(avg_weight*WDT, w); + + w *= keeps; + sum += px * w; + total_weight += w; no_weights += keeps; } #elif WD == 1 // moving cumulative average @@ -1122,29 +1217,23 @@ vec4 hook() avg_weight = total_weight / no_weights; #endif - total_weight += SW; - sum += poi * SW; + total_weight += SW * spatial_r(vec3(0)); + sum += poi * SW * spatial_r(vec3(0)); -#if M == 3 // weighted median intensity - const float hr_area = r_area/2.0; - vec4 is_median, gt, lt, gte, lte, neq; +#if V == 3 // weight map + result = val(avg_weight); +#else // mean + result = val(sum / total_weight); +#endif - for (int i = 0; i < r_area; i++) { - gt = lt = vec4(0); - for (int j = 0; j < r_area; j++) { - gte = step(all_pixels[i]*all_weights[i], all_pixels[j]*all_weights[j]); - lte = step(all_pixels[j]*all_weights[j], all_pixels[i]*all_weights[i]); - neq = 1 - gte * lte; - gt += gte * neq; - lt += lte * neq; - } - is_median = step(gt, vec4(hr_area)) * step(lt, vec4(hr_area)); - result += step(result, vec4(0)) * is_median * all_pixels[i]; - } -#elif M == 2 // weight map - result = avg_weight; -#elif M == 0 // mean - result = sum / total_weight; + // store frames for temporal +#if T > 1 + imageStore(PREV2, ivec2(HOOKED_pos*imageSize(PREV2)), unval(load2(vec3(0,0,2-1)))); +#endif +#if T && TRF + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(result)); +#elif T + imageStore(PREV1, ivec2(HOOKED_pos*imageSize(PREV1)), unval(poi2)); #endif #if ASW == 0 // pre-WD weights @@ -1154,22 +1243,20 @@ vec4 hook() #endif #if ASK == 0 - vec4 sharpening_strength = pow(AS_weight, vec4(ASP)); + val sharpening_strength = pow(AS_weight, val(ASP)); #elif ASK == 1 -#define sigmoid(x) (tanh(x * 2*M_PI - M_PI)*0.5+0.5) - vec4 sharpening_strength = mix(pow(sigmoid(AS_weight), vec4(ASP)), - AS_weight, ASC); - // just in case ASC < 0 (will sharpen but it's janky XXX) - sharpening_strength = clamp(sharpening_strength, 0.0, 1.0); + val sharpening_strength = mix( + pow(smoothstep(0.0, 1.0, AS_weight), val(ASP)), + AS_weight, ASC); + // XXX normalize the result to account for a negative ASC? #elif ASK == 2 - vec4 sharpening_strength = vec4(ASP); + val sharpening_strength = val(ASP); #endif - // XXX maybe allow for alternative blurs? e.g., replace result w/ load2? #if AS == 1 // sharpen+denoise - vec4 sharpened = result + (poi - result) * ASF; + val sharpened = result + (poi - result) * ASF; #elif AS == 2 // sharpen only - vec4 sharpened = poi + (poi - result) * ASF; + val sharpened = poi + (poi - result) * ASF; #endif #if EP // extremes preserve @@ -1185,35 +1272,29 @@ vec4 hook() result = mix(sharpened, poi, sharpening_strength); #endif -#if M == 4 // edge map +#if V == 4 // edge map result = sharpening_strength; #endif -#if (M == 2 || M == 4) && defined(CHROMA_raw) // drop chroma for weight maps - result = vec4(0.5); +#if (V == 3 || V == 4) && defined(CHROMA_raw) // drop chroma for these visualizations + return vec4(0.5); #endif -#if DV == 1 - result = clamp(abs(poi - result) * S, 0.0, 1.0); -#elif DV == 2 +#if V == 1 + result = clamp(pow(abs(poi - result), val(0.25)), 0.0, 1.0); +#elif V == 2 result = (poi - result) * 0.5 + 0.5; #endif - return mix(poi, result, BF); + return unval(mix(poi, result, BF)); } //!TEXTURE PREV1 //!SIZE 1920 1080 -//!FORMAT r32f +//!FORMAT r16f //!STORAGE //!TEXTURE PREV2 //!SIZE 1920 1080 -//!FORMAT r32f +//!FORMAT r16f //!STORAGE - -//!TEXTURE PREV3 -//!SIZE 1920 1080 -//!FORMAT r32f -//!STORAGE -