From 6b33b8b4c09d3f33524a1b89df5dc8e01d12efbe Mon Sep 17 00:00:00 2001 From: DL6ER Date: Sun, 27 Dec 2020 19:14:52 +0100 Subject: Store status of downloaded list (downloaded, using cache, some error, ...) and number of (in-)valid domains on this list in the gravity database. This updates the gravity databaes to version 14. Signed-off-by: DL6ER --- gravity.sh | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 7 deletions(-) (limited to 'gravity.sh') diff --git a/gravity.sh b/gravity.sh index 5a831dae..b238275f 100755 --- a/gravity.sh +++ b/gravity.sh @@ -217,6 +217,48 @@ database_adlist_updated() { fi } +# Check if a column with name ${2} exists in gravity table with name ${1} +gravity_column_exists() { + output=$( { printf ".timeout 30000\\nSELECT EXISTS(SELECT * FROM pragma_table_info('%s') WHERE name='%s');\\n" "${1}" "${2}" | sqlite3 "${gravityDBfile}"; } 2>&1 ) + if [[ "${output}" == "1" ]]; then + return 0 # Bash 0 is success + fi + + return 1 # Bash non-0 is failure +} + +# Update number of domain on this list. We store this in the "old" database as all values in the new database will later be overwritten +database_adlist_number() { + # Only try to set number of domains when this field exists in the gravity database + if ! gravity_column_exists "adlist" "number"; then + return; + fi + + output=$( { printf ".timeout 30000\\nUPDATE adlist SET number = %i, invalid_domains = %i WHERE id = %i;\\n" "${total_num}" "${invalid_num}" "${1}" | sqlite3 "${gravityDBfile}"; } 2>&1 ) + status="$?" + + if [[ "${status}" -ne 0 ]]; then + echo -e "\\n ${CROSS} Unable to update number of domains in adlist with ID ${1} in database ${gravityDBfile}\\n ${output}" + gravity_Cleanup "error" + fi +} + +# Update status of this list. We store this in the "old" database as all values in the new database will later be overwritten +database_adlist_status() { + # Only try to set the status when this field exists in the gravity database + if ! gravity_column_exists "adlist" "status"; then + return; + fi + + output=$( { printf ".timeout 30000\\nUPDATE adlist SET status = %i WHERE id = %i;\\n" "${2}" "${1}" | sqlite3 "${gravityDBfile}"; } 2>&1 ) + status="$?" + + if [[ "${status}" -ne 0 ]]; then + echo -e "\\n ${CROSS} Unable to update status of adlist with ID ${1} in database ${gravityDBfile}\\n ${output}" + gravity_Cleanup "error" + fi +} + # Migrate pre-v5.0 list files to database-based Pi-hole versions migrate_to_database() { # Create database file only if not present @@ -439,6 +481,7 @@ gravity_DownloadBlocklists() { } total_num=0 +invalid_num=0 parseList() { local adlistID="${1}" src="${2}" target="${3}" incorrect_lines # This sed does the following things: @@ -456,11 +499,11 @@ parseList() { num_target_lines="$(grep -c "^" "${target}")" num_correct_lines="$(( num_target_lines-total_num ))" total_num="$num_target_lines" - num_invalid="$(( num_lines-num_correct_lines ))" - if [[ "${num_invalid}" -eq 0 ]]; then - echo " ${INFO} Received ${num_lines} domains" + invalid_num="$(( num_lines-num_correct_lines ))" + if [[ "${invalid_num}" -eq 0 ]]; then + echo " ${INFO} Analyzed ${num_lines} domains" else - echo " ${INFO} Received ${num_lines} domains, ${num_invalid} domains invalid!" + echo " ${INFO} Analyzed ${num_lines} domains, ${invalid_num} domains invalid!" fi # Display sample of invalid lines if we found some @@ -554,31 +597,48 @@ gravity_DownloadBlocklistFromUrl() { esac;; esac + local done="false" # Determine if the blocklist was downloaded and saved correctly if [[ "${success}" == true ]]; then if [[ "${httpCode}" == "304" ]]; then # Add domains to database table file parseList "${adlistID}" "${saveLocation}" "${target}" + database_adlist_status "${adlistID}" "2" + database_adlist_number "${adlistID}" + done="true" # Check if $patternbuffer is a non-zero length file elif [[ -s "${patternBuffer}" ]]; then # Determine if blocklist is non-standard and parse as appropriate gravity_ParseFileIntoDomains "${patternBuffer}" "${saveLocation}" # Add domains to database table file parseList "${adlistID}" "${saveLocation}" "${target}" - # Update date_updated field in gravity database table + # Update gravity database table + database_adlist_status "${adlistID}" "1" database_adlist_updated "${adlistID}" + database_adlist_number "${adlistID}" + done="true" else # Fall back to previously cached list if $patternBuffer is empty - echo -e " ${INFO} Received empty file: ${COL_LIGHT_GREEN}using previously cached list${COL_NC}" + echo -e " ${INFO} Received empty file" fi - else + fi + + # Do we need to fall back to a cached list (if available)? + if [[ "${done}" != "true" ]]; then # Determine if cached list has read permission if [[ -r "${saveLocation}" ]]; then echo -e " ${CROSS} List download failed: ${COL_LIGHT_GREEN}using previously cached list${COL_NC}" # Add domains to database table file parseList "${adlistID}" "${saveLocation}" "${target}" + database_adlist_number "${adlistID}" + database_adlist_status "${adlistID}" "3" else echo -e " ${CROSS} List download failed: ${COL_LIGHT_RED}no cached list available${COL_NC}" + # Total number == -1 means there was no cached list that could have been used + total_num=-1 + invalid_num=0 + database_adlist_number "${adlistID}" + database_adlist_status "${adlistID}" "4" fi fi } -- cgit v1.2.3 From 8c56f54a1e4a0ad659914471f8aeb8d92b259956 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Tue, 29 Dec 2020 09:54:25 +0100 Subject: Compare checksum of downloaded list against older checksums to see if the list content changed since the last download Signed-off-by: DL6ER --- gravity.sh | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'gravity.sh') diff --git a/gravity.sh b/gravity.sh index b238275f..1defa06f 100755 --- a/gravity.sh +++ b/gravity.sh @@ -514,6 +514,27 @@ parseList() { done <<< "${incorrect_lines}" fi } +compareLists() { + local adlistID="${1}" target="${2}" result + + # Verify checksum when an older checksum exists + if [[ -s "${target}.sha1" ]]; then + if ! sha1sum --check --status --strict "${target}.sha1"; then + # The list changed upstream, we need to update the checksum + sha1sum "${target}" > "${target}.sha1" + echo " ${INFO} List has been updated" + database_adlist_status "${adlistID}" "1" + else + echo " ${INFO} List stayed unchanged" + database_adlist_status "${adlistID}" "2" + fi + else + # No checksum available, create one for comparing on the next run + sha1sum "${target}" > "${target}.sha1" + # We assume here it was changed upstream + database_adlist_status "${adlistID}" "1" + fi +} # Download specified URL and perform checks on HTTP status and file content gravity_DownloadBlocklistFromUrl() { @@ -612,8 +633,9 @@ gravity_DownloadBlocklistFromUrl() { gravity_ParseFileIntoDomains "${patternBuffer}" "${saveLocation}" # Add domains to database table file parseList "${adlistID}" "${saveLocation}" "${target}" - # Update gravity database table - database_adlist_status "${adlistID}" "1" + # Compare lists, are they identical? + compareLists "${adlistID}" "${saveLocation}" + # Update gravity database table (status is set in compareLists) database_adlist_updated "${adlistID}" database_adlist_number "${adlistID}" done="true" -- cgit v1.2.3 From a216848c1db65dc1be5a1928e3f225c7f10be694 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Tue, 29 Dec 2020 20:28:09 +0100 Subject: Only update time of last list change when we see a list for the first time or when it really changed content Signed-off-by: DL6ER --- gravity.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'gravity.sh') diff --git a/gravity.sh b/gravity.sh index 1defa06f..0119b710 100755 --- a/gravity.sh +++ b/gravity.sh @@ -524,6 +524,7 @@ compareLists() { sha1sum "${target}" > "${target}.sha1" echo " ${INFO} List has been updated" database_adlist_status "${adlistID}" "1" + database_adlist_updated "${adlistID}" else echo " ${INFO} List stayed unchanged" database_adlist_status "${adlistID}" "2" @@ -531,8 +532,10 @@ compareLists() { else # No checksum available, create one for comparing on the next run sha1sum "${target}" > "${target}.sha1" + echo " ${INFO} This list is new" # We assume here it was changed upstream database_adlist_status "${adlistID}" "1" + database_adlist_updated "${adlistID}" fi } @@ -635,8 +638,8 @@ gravity_DownloadBlocklistFromUrl() { parseList "${adlistID}" "${saveLocation}" "${target}" # Compare lists, are they identical? compareLists "${adlistID}" "${saveLocation}" - # Update gravity database table (status is set in compareLists) - database_adlist_updated "${adlistID}" + # Update gravity database table (status and updated timestamp are set in + # compareLists) database_adlist_number "${adlistID}" done="true" else -- cgit v1.2.3 From 0944807491ca40009eabe47c5198907b518100c4 Mon Sep 17 00:00:00 2001 From: DL6ER Date: Tue, 29 Dec 2020 20:35:48 +0100 Subject: Actually store correct number of domains for the individual lists (and not the sum of the so far collected number of domains) Signed-off-by: DL6ER --- gravity.sh | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'gravity.sh') diff --git a/gravity.sh b/gravity.sh index 0119b710..684d64b3 100755 --- a/gravity.sh +++ b/gravity.sh @@ -234,7 +234,7 @@ database_adlist_number() { return; fi - output=$( { printf ".timeout 30000\\nUPDATE adlist SET number = %i, invalid_domains = %i WHERE id = %i;\\n" "${total_num}" "${invalid_num}" "${1}" | sqlite3 "${gravityDBfile}"; } 2>&1 ) + output=$( { printf ".timeout 30000\\nUPDATE adlist SET number = %i, invalid_domains = %i WHERE id = %i;\\n" "${num_lines}" "${num_invalid}" "${1}" | sqlite3 "${gravityDBfile}"; } 2>&1 ) status="$?" if [[ "${status}" -ne 0 ]]; then @@ -481,7 +481,8 @@ gravity_DownloadBlocklists() { } total_num=0 -invalid_num=0 +num_lines=0 +num_invalid=0 parseList() { local adlistID="${1}" src="${2}" target="${3}" incorrect_lines # This sed does the following things: @@ -492,18 +493,18 @@ parseList() { # Find (up to) five domains containing invalid characters (see above) incorrect_lines="$(sed -e "/[^a-zA-Z0-9.\_-]/!d" "${src}" | head -n 5)" - local num_lines num_target_lines num_correct_lines num_invalid + local num_target_lines num_correct_lines num_invalid # Get number of lines in source file num_lines="$(grep -c "^" "${src}")" # Get number of lines in destination file num_target_lines="$(grep -c "^" "${target}")" num_correct_lines="$(( num_target_lines-total_num ))" total_num="$num_target_lines" - invalid_num="$(( num_lines-num_correct_lines ))" - if [[ "${invalid_num}" -eq 0 ]]; then + num_invalid="$(( num_lines-num_correct_lines ))" + if [[ "${num_invalid}" -eq 0 ]]; then echo " ${INFO} Analyzed ${num_lines} domains" else - echo " ${INFO} Analyzed ${num_lines} domains, ${invalid_num} domains invalid!" + echo " ${INFO} Analyzed ${num_lines} domains, ${num_invalid} domains invalid!" fi # Display sample of invalid lines if we found some @@ -515,7 +516,7 @@ parseList() { fi } compareLists() { - local adlistID="${1}" target="${2}" result + local adlistID="${1}" target="${2}" # Verify checksum when an older checksum exists if [[ -s "${target}.sha1" ]]; then @@ -659,9 +660,9 @@ gravity_DownloadBlocklistFromUrl() { database_adlist_status "${adlistID}" "3" else echo -e " ${CROSS} List download failed: ${COL_LIGHT_RED}no cached list available${COL_NC}" - # Total number == -1 means there was no cached list that could have been used - total_num=-1 - invalid_num=0 + # Manually reset these two numbers because we do not call parseList here + num_lines=0 + num_invalid=0 database_adlist_number "${adlistID}" database_adlist_status "${adlistID}" "4" fi -- cgit v1.2.3 From a2625df5e2a7e406cadcd430ea3902c47234769c Mon Sep 17 00:00:00 2001 From: DL6ER Date: Wed, 30 Dec 2020 11:27:34 +0100 Subject: Remove "The list is new" comment because it is superfluous Signed-off-by: DL6ER --- gravity.sh | 1 - 1 file changed, 1 deletion(-) (limited to 'gravity.sh') diff --git a/gravity.sh b/gravity.sh index 684d64b3..0dd65bfb 100755 --- a/gravity.sh +++ b/gravity.sh @@ -533,7 +533,6 @@ compareLists() { else # No checksum available, create one for comparing on the next run sha1sum "${target}" > "${target}.sha1" - echo " ${INFO} This list is new" # We assume here it was changed upstream database_adlist_status "${adlistID}" "1" database_adlist_updated "${adlistID}" -- cgit v1.2.3