From 6a5582c7e37dfed86ae5fed7ba30bfe2df8ae522 Mon Sep 17 00:00:00 2001 From: CyberLeo Date: Thu, 7 Apr 2016 03:11:35 -0500 Subject: [PATCH] Change to accept URL instead of post ID and glean profile from that --- danbooru | 1 - e621 | 1 - fetch | 55 +++++++++++++------------ gelbooru | 1 - lib/profiles/danbooru | 14 +++++-- lib/profiles/e621 | 16 ++++++-- lib/profiles/gelbooru | 16 ++++++-- lib/profiles/safebooru | 14 +++++-- lib/profiles/wildcritters | 14 +++++-- lib/taggery.sh | 86 +++++++++++++++++++++++++++++++-------- safebooru | 1 - wildcritters | 1 - 12 files changed, 153 insertions(+), 67 deletions(-) delete mode 120000 danbooru delete mode 120000 e621 delete mode 120000 gelbooru delete mode 120000 safebooru delete mode 120000 wildcritters diff --git a/danbooru b/danbooru deleted file mode 120000 index ef77e7f..0000000 --- a/danbooru +++ /dev/null @@ -1 +0,0 @@ -fetch \ No newline at end of file diff --git a/e621 b/e621 deleted file mode 120000 index ef77e7f..0000000 --- a/e621 +++ /dev/null @@ -1 +0,0 @@ -fetch \ No newline at end of file diff --git a/fetch b/fetch index 7357773..028d9e8 100755 --- a/fetch +++ b/fetch @@ -5,7 +5,7 @@ pebkac() { cat < -Given an ID number from the site: +Given a post URL number from the site: * downloads and stashes the full-size image in the md5 directory * adds whatever tags are found into the tag area @@ -32,29 +32,37 @@ done shift $(( $OPTIND - 1 )) [ "${*}" ] || pebkac -id="${1}" -[ -z "$(echo "${id}" | tr -d '[0-9]')" ] || pebkac "ID should be an integer" +url="${1}" +#[ -z "$(echo "${id}" | tr -d '[0-9]')" ] || pebkac "ID should be an integer" -taggery_name="$(basename "${0}")" -taggery_profile="$(dirname "${0}")/lib/profiles/${taggery_name}" -[ -f "${taggery_profile}" ] || pebkac "Unsupported profile: ${taggery_name}" -. "${taggery_profile}" +taggery_base="$(dirname "${0}")" +taggery_libs="$(dirname "${0}")/lib" + +# Try and find the profile that can handle the url +profiles="$(cd "${taggery_libs}/profiles"; ls -1)" +for profile in ${profiles} +do + . "${taggery_libs}/profiles/${profile}" + "${profile}_can_handle" "${url}" || continue + taggery_name="${profile}" + break +done # Set up temporary area -mkdir -p "$(dirname "${0}")/tmp" -TMPDIR="$(dirname "${0}")/tmp" +#mkdir -p "$(dirname "${0}")/tmp" +#TMPDIR="$(dirname "${0}")/tmp" # Load KVS -kvs="${TMPDIR}/.${taggery_name}.kvdb" -. "$(dirname "${0}")/lib/kvs.sh" +#kvs="${TMPDIR}/.${taggery_name}.kvdb" +#. "$(dirname "${0}")/lib/kvs.sh" # Load Taggery -. "$(dirname "${0}")/lib/taggery.sh" +. "${taggery_libs}/taggery.sh" +url_id() { taggery_id "${@}"; } image_url() { taggery_image_url "${@}"; } image_tags() { taggery_image_tags "${@}"; } -cd "$(dirname "${0}")" - +id="$(url_id "${url}")" file="$(image_url "${id}")" tags="$(image_tags "${id}")" @@ -72,35 +80,28 @@ extn="${name#*.}" unset nabbed if [ "${file}" ] then - if [ -e "md5/${name}" ] + if [ -e "${taggery_base}/md5/${name}" ] then - echo "Filename md5/${name} already exists" + echo "Filename ${name} already exists" else - referer="$(taggery_image_referer "${id}")" - wget ${referer:+--referer="${referer}"} -O "md5/${name}" "${file}" + taggery_fetch_image "${id}" nabbed=NABBED fi fi if [ "${nabbed}" -o "${tags}" ] then - for tag in ${tags} - do - echo ${tag} - tag="$(echo "${tag}" | sed -e 's/\//%47/g; s/[\]/\\&/g')" - mkdir -p "tag/${tag}" - ln -sf "../../md5/${name}" "tag/${tag}/" - done + taggery_link_tags "${id}" fi # Even if we didn't download it, try to link it into the pool [ "${pool}" ] && { - plnm="pool/${pool}/${indx}.${extn}" + plnm="${taggery_base}/pool/${pool}/${indx}.${extn}" if [ -e "${plnm}" ] then echo "Filename ${plnm} already exists" else - mkdir -p "pool/${pool}" + mkdir -p "${taggery_base}/pool/${pool}" ln -svf "../../md5/${name}" "${plnm}" fi } diff --git a/gelbooru b/gelbooru deleted file mode 120000 index ef77e7f..0000000 --- a/gelbooru +++ /dev/null @@ -1 +0,0 @@ -fetch \ No newline at end of file diff --git a/lib/profiles/danbooru b/lib/profiles/danbooru index c82d999..5c9ad1f 100644 --- a/lib/profiles/danbooru +++ b/lib/profiles/danbooru @@ -1,9 +1,17 @@ +danbooru_can_handle() { + local url="${1}" + echo "${url}" | grep -qi 'danbooru.donmai.us/posts/[0-9]*' || return 1 + taggery_id_filter=danbooru_id_filter + taggery_fmturl="https://danbooru.donmai.us/posts/%s" + taggery_image_url_filter=danbooru_image_url_filter + taggery_image_tags_filter=danbooru_image_tags_filter +} +danbooru_id_filter() { + sed -e 's#^.*/posts/\([0-9]\+\)\($\|/.*$\)#\1#' +} danbooru_image_url_filter() { sed -e '/Size: /!d; s/^.*/&\n/g' | sed -e '/class="search-tag/!d; s/\([^?]\)<\/a>.*$/\1/; s/^.*>//; s/ /_/g' } -taggery_fmturl="http://danbooru.donmai.us/posts/%s" -taggery_image_url_filter=danbooru_image_url_filter -taggery_image_tags_filter=danbooru_image_tags_filter diff --git a/lib/profiles/e621 b/lib/profiles/e621 index 69cbb53..01066bc 100644 --- a/lib/profiles/e621 +++ b/lib/profiles/e621 @@ -1,9 +1,17 @@ +e621_can_handle() { + local url="${1}" + echo "${url}" | grep -qi 'e621.net/post/show/[0-9]*' || return 1 + taggery_id_filter=e621_id_filter + taggery_fmturl="https://e621.net/post/show/%s" + taggery_image_url_filter=e621_image_url_filter + taggery_image_tags_filter=e621_image_tags_filter +} +e621_id_filter() { + sed -e 's#^.*/post/show/\([0-9]\+\)\($\|/.*$\)#\1#' +} e621_image_url_filter() { - sed -e '/Size: /!d; s/^.*.*$//; s/^.*href="\([^"]*\)".*$/\1/; /:\/\//!s/^/http:\/\/e621.net/' + sed -e '/Size: /!d; s/^.*.*$//; s/^.*href="\([^"]*\)".*$/\1/; /:\/\//!s/^/https:\/\/e621.net/' } e621_image_tags_filter() { sed -e 's/<\/li>/&\n/g' | sed -e '/id="tag-sidebar"/,/<\/ul>/!d; /class="tag-type-/!d; s/\([^?]\)<\/a>.*$/\1/; s/^.*>//; s/ /_/g' } -taggery_fmturl="http://e621.net/post/show/%s" -taggery_image_url_filter=e621_image_url_filter -taggery_image_tags_filter=e621_image_tags_filter diff --git a/lib/profiles/gelbooru b/lib/profiles/gelbooru index 8c52c26..4c2b26a 100644 --- a/lib/profiles/gelbooru +++ b/lib/profiles/gelbooru @@ -1,3 +1,15 @@ +gelbooru_can_handle() { + local url="${1}" + echo "${url}" | grep -qi 'gelbooru.com/.*id=[0-9]*' || return 1 + taggery_id_filter=gelbooru_id_filter + taggery_fmturl="http://gelbooru.com/index.php?page=post&s=view&id=%s" + taggery_image_referer=gelbooru_image_referer + taggery_image_url_filter=gelbooru_image_url_filter + taggery_image_tags_filter=gelbooru_image_tags_filter +} +gelbooru_id_filter() { + sed -e 's#^.*id=\([0-9]\+\)\($\|&.*$\)#\1#' +} gelbooru_image_referer() { echo "http://www.gelbooru.com/index.php?page=post&s=view&id=${1}" } @@ -7,7 +19,3 @@ gelbooru_image_url_filter() { gelbooru_image_tags_filter() { sed -e '/id="tag-sidebar"/!d; s/<\/li>/&\n/g' | sed -e '/class="tag-type-/!d; s/\([^?]\)<\/a>.*$/\1/; s/^.*>//; s/ /_/g' } -taggery_fmturl="http://gelbooru.com/index.php?page=post&s=view&id=%s" -taggery_image_referer=gelbooru_image_referer -taggery_image_url_filter=gelbooru_image_url_filter -taggery_image_tags_filter=gelbooru_image_tags_filter diff --git a/lib/profiles/safebooru b/lib/profiles/safebooru index 8bc7411..00bc2af 100644 --- a/lib/profiles/safebooru +++ b/lib/profiles/safebooru @@ -1,9 +1,17 @@ +safebooru_can_handle() { + local url="${1}" + echo "${url}" | grep -qi 'safebooru.org/.*id=[0-9]*' || return 1 + taggery_id_filter=safebooru_id_filter + taggery_fmturl="http://safebooru.org/index.php?page=post&s=view&id=%s" + taggery_image_url_filter=safebooru_image_url_filter + taggery_image_tags_filter=safebooru_image_tags_filter +} +safebooru_id_filter() { + sed -e 's#^.*id=\([0-9]\+\)\($\|&.*$\)#\1#' +} safebooru_image_url_filter() { sed -e '/>Original imageOriginal image.*//i; s/^.*/&\n/gi' | sed -e 's/<\/a>.*$//; s/^.*>//; /^[[:space:]]*$/d; s/ /_/g' } -taggery_fmturl="http://safebooru.org/index.php?page=post&s=view&id=%s" -taggery_image_url_filter=safebooru_image_url_filter -taggery_image_tags_filter=safebooru_image_tags_filter diff --git a/lib/profiles/wildcritters b/lib/profiles/wildcritters index e483579..8034439 100644 --- a/lib/profiles/wildcritters +++ b/lib/profiles/wildcritters @@ -1,9 +1,17 @@ +wildcritters_can_handle() { + local url="${1}" + echo "${url}" | grep -qi 'wildcritters.ws/post/show/[0-9]*' || return 1 + taggery_id_filter=wildcritters_id_filter + taggery_fmturl="http://wildcritters.ws/post/show/%s" + taggery_image_url_filter=wildcritters_image_url_filter + taggery_image_tags_filter=wildcritters_image_tags_filter +} +wildcritters_id_filter() { + sed -e 's#^.*/post/show/\([0-9]\+\)\($\|/.*$\)#\1#' +} wildcritters_image_url_filter() { sed -e '/Size: /!d; s/^.*.*$//; s/^.*href="\([^"]*\)".*$/\1/; /:\/\//!s/^/http:\/\/wildcritters.ws/' } wildcritters_image_tags_filter() { sed -e '/id="tag-sidebar"/,/<\/ul>/!d; /class="tag-type-/!d; s/\([^?]\)<\/a>.*$/\1/; s/^.*>//; s/ /_/g' } -taggery_fmturl="http://wildcritters.ws/post/show/%s" -taggery_image_url_filter=wildcritters_image_url_filter -taggery_image_tags_filter=wildcritters_image_tags_filter diff --git a/lib/taggery.sh b/lib/taggery.sh index 84ffecc..3f873e6 100644 --- a/lib/taggery.sh +++ b/lib/taggery.sh @@ -5,15 +5,24 @@ Copyright - http://wiki.cyberleo.net/wiki/CyberLeo/COPYRIGHT?version=4 Set the following variables before sourcing this library: + taggery_base - The base directory where the taggery database + resides. This should contain the directories + md5/ tag/ pool/ + taggery_libs - The base directory where the code resides. + +The following should be defined by the profile: + taggery_name - A string token to use when formatting things taggery_fmturl - A printf-compatible format string that will - transform an ID into the URL of a 'view' page + transform an ID into the URL of a 'view' page. taggery_image_url_filter - A program or function that takes a webpage on stdin and provides on stdout the URL to the raw image to be downloaded. taggery_image_tags_filter - A program or function that takes a webpage on stdin and provides on stdout a list of newline- delimited tags. + taggery_image_referer - A program or function that takes an id and + returns the referer URL for that id. Examples: Format URL: @@ -29,7 +38,7 @@ EOF } taggery_cleanup() { - local cleanup="$(kvs_get clean up)" + local cleanup="$(kvs_get "${taggery_name}" cleanup)" if [ "${cleanup}" ] then local id @@ -38,50 +47,56 @@ taggery_cleanup() { taggery_free_page "${id}" done fi - kvs_unset clean up + kvs_unset "${taggery_name}" cleanup } trap "taggery_cleanup" EXIT HUP INT TERM KILL taggery_register_cleanup() { [ "${1}" ] || return 1 local id="${1}" - kvs_set clean up "$(kvs_get clean up) ${id}" + kvs_set "${taggery_name}" cleanup "$(kvs_get "${taggery_name}" cleanup) ${id}" } taggery_init_page() { [ "${1}" ] || return 1 local id="${1}" - kvs_set "${id}" tmp "$(mktemp --tmpdir ".taggery_${id}.XXXXXXXX")" - kvs_set "${id}" url "$(printf "${taggery_fmturl}" "${id}")" + kvs_set "${taggery_name}/${id}" tmp "$(mktemp "${taggery_temp}/.taggery_${id}.XXXXXXXX")" + kvs_set "${taggery_name}/${id}" url "$(printf "${taggery_fmturl}" "${id}")" taggery_register_cleanup "${id}" } taggery_free_page() { [ "${1}" ] || return 1 local id="${1}" - local tmp="$(kvs_get "${id}" tmp)" - [ -f "${tmp}" -a "${tmp%%_*}" = ".taggery" ] && rm -f "${tmp}" - kvs_unset_all "${id}" + local tmp="$(kvs_get "${taggery_name}/${id}" tmp)" + [ -f "${tmp}" -a "${tmp%%_*}" = "${taggery_temp}/.taggery" ] && rm -f "${tmp}" + kvs_unset_all "${taggery_name}/${id}" } -taggery_fetch() { +taggery_fetch_page() { [ "${1}" ] || return 1 local id="${1}" - kvs_has_id "${id}" || taggery_init_page "${id}" - local url="$(kvs_get "${id}" url)" - local tmp="$(kvs_get "${id}" tmp)" + kvs_has_id "${taggery_name}/${id}" || taggery_init_page "${id}" + local url="$(kvs_get "${taggery_name}/${id}" url)" + local tmp="$(kvs_get "${taggery_name}/${id}" tmp)" wget -qO "${tmp}" "${url}" } taggery_page() { [ "${1}" ] || return 1 local id="${1}" - kvs_has_id "${id}" || taggery_init_page "${id}" - local tmp="$(kvs_get "${id}" tmp)" - [ -s "${tmp}" ] || taggery_fetch "${id}" + kvs_has_id "${taggery_name}/${id}" || taggery_init_page "${id}" + local tmp="$(kvs_get "${taggery_name}/${id}" tmp)" + [ -s "${tmp}" ] || taggery_fetch_page "${id}" cat "${tmp}" } +taggery_id() { + [ "${1}" ] || return 1 + local url="${1}" + echo "${url}" | "${taggery_id_filter}" +} + taggery_image_url() { [ "${1}" ] || return 1 local id="${1}" @@ -92,12 +107,47 @@ taggery_image_referer() { [ "${1}" ] || return 1 local id="${1}" [ "${taggery_image_referer}" ] || return 0 - "${taggery_image_referer}" "${1}" + "${taggery_image_referer}" "${id}" +} + +taggery_fetch_image() { + [ "${1}" ] || return 1 + local id="${1}" + file="$(taggery_image_url "${id}")" + name="$(basename "${file}")" + referer="$(taggery_image_referer "${id}")" + wget ${referer:+--referer="${referer}"} -O "${taggery_base}/md5/${name}" "${file}" } taggery_image_tags() { [ "${1}" ] || return 1 local id="${1}" - echo "$(basename "${taggery_profile}")" + echo "${taggery_name}" taggery_page "${id}" | "${taggery_image_tags_filter}" } + +taggery_sanitize_tag() { + [ "${1}" ] || return 1 + local tag="${1}" + echo "${tag}" | sed -e 's/\//%47/g; s/[\]/\\&/g' +} + +taggery_link_tags() { + [ "${1}" ] || return 1 + local id="${1}" + taggery_image_tags "${id}" | while read tag + do + [ "${tag}" ] || continue + echo "${tag}" + tag="$(taggery_sanitize_tag "${tag}")" + mkdir -p "${taggery_base}/tag/${tag}" + ln -sf "../../md5/${name}" "${taggery_base}/tag/${tag}/" + done +} + +: ${taggery_temp:=${taggery_base}/tmp} + +mkdir -p "${taggery_temp}" "${taggery_base}/md5" "${taggery_base}/tag" "${taggery_base}/pool" +TMPDIR="${taggery_temp}" +kvs="${taggery_temp}/taggery.kvs" +. "${taggery_libs}/kvs.sh" \ No newline at end of file diff --git a/safebooru b/safebooru deleted file mode 120000 index ef77e7f..0000000 --- a/safebooru +++ /dev/null @@ -1 +0,0 @@ -fetch \ No newline at end of file diff --git a/wildcritters b/wildcritters deleted file mode 120000 index ef77e7f..0000000 --- a/wildcritters +++ /dev/null @@ -1 +0,0 @@ -fetch \ No newline at end of file -- 2.42.0