First Github Actions build.yml attempt
First Github Actions build.yml attempt Added updated scripts. new files
This commit is contained in:
parent
aad2df05ed
commit
3c5388fe4e
17
src/step 5 ooni list/bird2_converter.py
Normal file
17
src/step 5 ooni list/bird2_converter.py
Normal file
@ -0,0 +1,17 @@
|
||||
# Load the content of the newly uploaded file
|
||||
final_file_path = 'sum/output/ipsum.lst'
|
||||
|
||||
# Read the content of the new file
|
||||
with open(final_file_path, 'r') as file:
|
||||
final_lines = file.readlines()
|
||||
|
||||
# Create a new list for the formatted output
|
||||
final_formatted_routes = [f"route {line.strip()} reject;" for line in final_lines]
|
||||
|
||||
# Output the formatted routes to a new file with the same name as requested
|
||||
final_output_file_path = 'sum/output/formatted_routes.lst'
|
||||
with open(final_output_file_path, 'w') as output_file:
|
||||
output_file.write('\n'.join(final_formatted_routes))
|
||||
|
||||
# Provide the path for the new file
|
||||
final_output_file_path
|
||||
@ -1,23 +1,12 @@
|
||||
import logging
|
||||
from idna import encode as idna_encode
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(level=logging.DEBUG, # Set the lowest level to capture all logs
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("domain_processing.log", mode='a'),
|
||||
logging.StreamHandler() # This will print logs to console as well
|
||||
])
|
||||
|
||||
# Function to read domains from a file
|
||||
def read_domains_from_file(file_path):
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
domains = [line.strip() for line in f.readlines() if line.strip()]
|
||||
logging.info(f"Read {len(domains)} domains from {file_path}.")
|
||||
return domains
|
||||
except FileNotFoundError as e:
|
||||
logging.error(f"File not found: {file_path}, {e}")
|
||||
except FileNotFoundError:
|
||||
return []
|
||||
|
||||
# Function to convert domains to punycode
|
||||
@ -27,8 +16,8 @@ def convert_to_punycode(domains):
|
||||
try:
|
||||
punycode_domain = idna_encode(domain).decode('utf-8')
|
||||
punycode_domains.add(punycode_domain)
|
||||
except Exception as e:
|
||||
logging.error(f"Punycode conversion failed for domain {domain}: {e}")
|
||||
except Exception:
|
||||
pass
|
||||
return punycode_domains
|
||||
|
||||
# Main function to process domain files and create the output file
|
||||
@ -50,9 +39,8 @@ def main():
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
for domain in sorted(unique_domains):
|
||||
f.write(f"{domain}\n")
|
||||
logging.info(f"Written {len(unique_domains)} unique domains to {output_file}.")
|
||||
except Exception as e:
|
||||
logging.error(f"Error writing to file {output_file}: {e}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,44 +0,0 @@
|
||||
import logging
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(level=logging.DEBUG, # Set the lowest level to capture all logs
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("ip_processing.log", mode='a'),
|
||||
logging.StreamHandler() # This will print logs to console as well
|
||||
])
|
||||
|
||||
# Function to read IPs from a file
|
||||
def read_ips_from_file(file_path):
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
ips = [line.strip() for line in f.readlines() if line.strip()]
|
||||
logging.info(f"Read {len(ips)} IPs from {file_path}.")
|
||||
return ips
|
||||
except FileNotFoundError as e:
|
||||
logging.error(f"File not found: {file_path}, {e}")
|
||||
return []
|
||||
|
||||
# Main function to process IP files and create the output file
|
||||
def main():
|
||||
# Read IPs from the three files
|
||||
ips1 = read_ips_from_file("input/ip.lst")
|
||||
ips2 = read_ips_from_file("input/ip_ooni.lst")
|
||||
ips3 = read_ips_from_file("input/ip_community.lst")
|
||||
ips4 = read_ips_from_file("input/discord_ips.lst")
|
||||
|
||||
# Combine all IPs and remove duplicates
|
||||
unique_ips = set(ips1 + ips2 + ips3 + ips4)
|
||||
|
||||
# Write the unique IPs to the output file
|
||||
output_file = "ips_all.lst"
|
||||
try:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
for ip in sorted(unique_ips):
|
||||
f.write(f"{ip}\n")
|
||||
logging.info(f"Written {len(unique_ips)} unique IPs to {output_file}.")
|
||||
except Exception as e:
|
||||
logging.error(f"Error writing to file {output_file}: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
31
src/step 5 ooni list/ip_4files_sum.py
Normal file
31
src/step 5 ooni list/ip_4files_sum.py
Normal file
@ -0,0 +1,31 @@
|
||||
# Function to read IPs from a file
|
||||
def read_ips_from_file(file_path):
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
ips = [line.strip() for line in f.readlines() if line.strip()]
|
||||
return ips
|
||||
except FileNotFoundError:
|
||||
return []
|
||||
|
||||
# Main function to process IP files and create the output file
|
||||
def main():
|
||||
# Read IPs from the three files
|
||||
ips1 = read_ips_from_file("sum/input/ip.lst")
|
||||
ips2 = read_ips_from_file("sum/input/ooni_ips.lst")
|
||||
ips3 = read_ips_from_file("sum/input/ip_community.lst")
|
||||
ips4 = read_ips_from_file("sum/input/discord_ips.lst")
|
||||
|
||||
# Combine all IPs and remove duplicates
|
||||
unique_ips = set(ips1 + ips2 + ips3 + ips4)
|
||||
|
||||
# Write the unique IPs to the output file
|
||||
output_file = "sum/input/ips_all.lst"
|
||||
try:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
for ip in sorted(unique_ips):
|
||||
f.write(f"{ip}\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -4,25 +4,42 @@ import logging
|
||||
import requests
|
||||
import ipaddress
|
||||
import time
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from idna import encode as idna_encode
|
||||
|
||||
# Paths to input files
|
||||
IP_LST_PATH = "input/ips_all.lst"
|
||||
DOMAINS_LST_PATH = "input/domains_all.lst"
|
||||
OUTPUT_FILE = "output/ipsum.lst"
|
||||
IP_LST_PATH = 'sum/input/ips_all.lst'
|
||||
DOMAINS_LST_PATH = 'sum/output/domains_all.lst'
|
||||
OUTPUT_FILE = 'sum/output/ipsum.lst'
|
||||
|
||||
# Path to the GeoLite2 ASN database
|
||||
GEOIP_DB_PATH = "GeoLite2-ASN.mmdb"
|
||||
GEOIP_DB_PATH = 'sum/GeoLite2-ASN.mmdb'
|
||||
GEOIP_DB_URL = 'https://git.io/GeoLite2-ASN.mmdb'
|
||||
|
||||
# Function to download the GeoLite2 ASN database
|
||||
def download_geolite2_asn_db():
|
||||
if not os.path.exists(GEOIP_DB_PATH):
|
||||
try:
|
||||
response = requests.get(GEOIP_DB_URL)
|
||||
response.raise_for_status()
|
||||
with open(GEOIP_DB_PATH, 'wb') as f:
|
||||
f.write(response.content)
|
||||
logging.info(f'Downloaded GeoLite2 ASN database to {GEOIP_DB_PATH}')
|
||||
except requests.RequestException as e:
|
||||
logging.error(f'Failed to download GeoLite2 ASN database: {e}')
|
||||
raise
|
||||
|
||||
# Initialize the GeoIP2 reader
|
||||
reader = geoip2.database.Reader(GEOIP_DB_PATH)
|
||||
def initialize_geoip_reader():
|
||||
download_geolite2_asn_db()
|
||||
return geoip2.database.Reader(GEOIP_DB_PATH)
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(level=logging.DEBUG,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler("summary.log", mode='a'),
|
||||
logging.FileHandler('summary.log', mode='a'),
|
||||
logging.StreamHandler()
|
||||
])
|
||||
|
||||
@ -46,14 +63,14 @@ COMPANY_DOMAINS = {
|
||||
|
||||
# Local IP CIDRs to exclude
|
||||
LOCAL_IP_CIDRS = [
|
||||
ipaddress.ip_network("127.0.0.0/8"),
|
||||
ipaddress.ip_network("10.0.0.0/8"),
|
||||
ipaddress.ip_network("172.16.0.0/12"),
|
||||
ipaddress.ip_network("192.168.0.0/16"),
|
||||
ipaddress.ip_network("169.254.0.0/16"),
|
||||
ipaddress.ip_network("::1/128"),
|
||||
ipaddress.ip_network("fc00::/7"),
|
||||
ipaddress.ip_network("fe80::/10")
|
||||
ipaddress.ip_network('127.0.0.0/8'),
|
||||
ipaddress.ip_network('10.0.0.0/8'),
|
||||
ipaddress.ip_network('172.16.0.0/12'),
|
||||
ipaddress.ip_network('192.168.0.0/16'),
|
||||
ipaddress.ip_network('169.254.0.0/16'),
|
||||
ipaddress.ip_network('::1/128'),
|
||||
ipaddress.ip_network('fc00::/7'),
|
||||
ipaddress.ip_network('fe80::/10')
|
||||
]
|
||||
|
||||
# Function to summarize IPs into /28 subnets at most
|
||||
@ -71,22 +88,22 @@ def summarize_ips(ips):
|
||||
else:
|
||||
summarized_networks.append(network)
|
||||
|
||||
logging.info(f"Summarized networks: {summarized_networks}")
|
||||
logging.info(f'Summarized networks: {summarized_networks}')
|
||||
return summarized_networks
|
||||
except ValueError as e:
|
||||
logging.error(f"Error summarizing IPs: {e}")
|
||||
logging.error(f'Error summarizing IPs: {e}')
|
||||
return []
|
||||
|
||||
# Function to handle rate-limiting errors (429) and retry after waiting
|
||||
def handle_rate_limit():
|
||||
wait_time = 60 # Wait time of 60 seconds
|
||||
logging.warning(f"Rate limit hit. Waiting for {wait_time} seconds.")
|
||||
logging.warning(f'Rate limit hit. Waiting for {wait_time} seconds.')
|
||||
time.sleep(wait_time)
|
||||
|
||||
# Function to get CIDRs for a domain from ASN using GeoLite2
|
||||
def get_cidr_for_asn(asn):
|
||||
try:
|
||||
url = f"https://api.bgpview.io/asn/{asn}/prefixes"
|
||||
url = f'https://api.bgpview.io/asn/{asn}/prefixes'
|
||||
response = requests.get(url)
|
||||
|
||||
if response.status_code == 200:
|
||||
@ -98,12 +115,12 @@ def get_cidr_for_asn(asn):
|
||||
return get_cidr_for_asn(asn) # Retry after waiting
|
||||
|
||||
elif response.status_code == 403:
|
||||
logging.error(f"Access forbidden for ASN {asn}, skipping.")
|
||||
logging.error(f'Access forbidden for ASN {asn}, skipping.')
|
||||
return []
|
||||
|
||||
return []
|
||||
except Exception as e:
|
||||
logging.error(f"Error retrieving CIDRs for ASN {asn}: {e}")
|
||||
logging.error(f'Error retrieving CIDRs for ASN {asn}: {e}')
|
||||
return []
|
||||
|
||||
# Function to resolve a domain with retries and punycode support
|
||||
@ -112,15 +129,16 @@ def resolve_domain(domain):
|
||||
domain_punycode = idna_encode(domain).decode('utf-8')
|
||||
return socket.gethostbyname_ex(domain_punycode)[2]
|
||||
except Exception as e:
|
||||
logging.error(f"Could not resolve domain {domain}: {e}")
|
||||
logging.error(f'Could not resolve domain {domain}: {e}')
|
||||
return []
|
||||
|
||||
# Function to check if a domain matches COMPANY_DOMAINS and fetch CIDRs
|
||||
def process_domain_for_asn(domain):
|
||||
def process_domain_for_asn(domain, processed_asns):
|
||||
asns = COMPANY_DOMAINS.get(domain, [])
|
||||
cidrs = set()
|
||||
if asns:
|
||||
for asn in asns:
|
||||
for asn in asns:
|
||||
if asn not in processed_asns:
|
||||
processed_asns.add(asn)
|
||||
cidrs.update(get_cidr_for_asn(asn))
|
||||
return cidrs
|
||||
|
||||
@ -130,7 +148,7 @@ def read_ips_from_file(file_path):
|
||||
with open(file_path, 'r') as f:
|
||||
return [line.strip() for line in f.readlines() if line.strip()]
|
||||
except FileNotFoundError:
|
||||
logging.error(f"File not found: {file_path}")
|
||||
logging.error(f'File not found: {file_path}')
|
||||
return []
|
||||
|
||||
# Function to check if an IP is local
|
||||
@ -141,7 +159,7 @@ def is_local_ip(ip):
|
||||
if ip_obj.version == cidr.version and ip_obj.subnet_of(cidr):
|
||||
return True
|
||||
except ValueError as e:
|
||||
logging.error(f"Invalid IP or CIDR: {ip}: {e}")
|
||||
logging.error(f'Invalid IP or CIDR: {ip}: {e}')
|
||||
return False
|
||||
|
||||
# Function to write summarized CIDRs to ipsum.lst
|
||||
@ -149,13 +167,16 @@ def write_summarized_ips(ips, filename):
|
||||
try:
|
||||
with open(filename, 'w') as f:
|
||||
for cidr in ips:
|
||||
f.write(f"{cidr}\n")
|
||||
logging.info(f"Written summarized IPs to {filename}")
|
||||
f.write(f'{cidr}\n')
|
||||
logging.info(f'Written summarized IPs to {filename}')
|
||||
except Exception as e:
|
||||
logging.error(f"Error writing summarized IPs to file: {e}")
|
||||
logging.error(f'Error writing summarized IPs to file: {e}')
|
||||
|
||||
# Main function to process ip.lst, summarize, and add CIDRs for company domains
|
||||
def main():
|
||||
# Initialize the GeoIP2 reader
|
||||
reader = initialize_geoip_reader()
|
||||
|
||||
# Read IPs from ip.lst
|
||||
ips = read_ips_from_file(IP_LST_PATH)
|
||||
|
||||
@ -168,9 +189,10 @@ def main():
|
||||
# Check domains.lst for COMPANY_DOMAINS matches and get corresponding CIDRs
|
||||
domains = read_ips_from_file(DOMAINS_LST_PATH)
|
||||
company_cidrs = set()
|
||||
processed_asns = set()
|
||||
|
||||
for domain in domains:
|
||||
company_cidrs.update(process_domain_for_asn(domain))
|
||||
company_cidrs.update(process_domain_for_asn(domain, processed_asns))
|
||||
|
||||
# Combine summarized IPs and company CIDRs
|
||||
final_cidrs = set(summarized_ips) | company_cidrs
|
||||
@ -178,5 +200,5 @@ def main():
|
||||
# Write the final output to ipsum.lst
|
||||
write_summarized_ips(final_cidrs, OUTPUT_FILE)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -17,10 +17,10 @@ def normalize_domain(domain):
|
||||
# Function to fetch and process OONI domains with logging and anomaly checks
|
||||
def fetch_and_process_ooni_domains(output_file):
|
||||
try:
|
||||
# Calculate the date range for the last 7 days
|
||||
# Calculate the date range for the last 14 days
|
||||
today = datetime.now()
|
||||
until_date = today.strftime('%Y-%m-%d')
|
||||
since_date = (today - timedelta(days=7)).strftime('%Y-%m-%d')
|
||||
since_date = (today - timedelta(days=14)).strftime('%Y-%m-%d')
|
||||
|
||||
# Construct the URL for downloading the CSV file using the OONI API
|
||||
base_url = "https://api.ooni.io/api/v1/aggregation"
|
||||
@ -58,9 +58,9 @@ def fetch_and_process_ooni_domains(output_file):
|
||||
# Log domain processing details
|
||||
logging.info(f"Checking domain: {domain} | Anomalies: {anomaly_count}, OK: {ok_count}, Anomaly Rate: {anomaly_count / (anomaly_count + ok_count) if (anomaly_count + ok_count) > 0 else 0:.2f}")
|
||||
|
||||
# Filter out incorrect domains
|
||||
if re.match(pattern, domain):
|
||||
logging.info(f"Domain has incorrect format: {domain}")
|
||||
# Filter out incorrect domains and yandex domains
|
||||
if re.match(pattern, domain) or domain.endswith('yandex.net') or domain.endswith('yandex.ru'):
|
||||
logging.info(f"Domain is either incorrectly formatted or a Yandex domain: {domain}")
|
||||
continue
|
||||
|
||||
# Log and process based on anomaly vs OK count
|
||||
@ -83,7 +83,7 @@ def fetch_and_process_ooni_domains(output_file):
|
||||
logging.error(f"Error occurred during fetching or processing: {e}")
|
||||
|
||||
# Replace with your output file path
|
||||
output_file = 'ooni/ooni_domains.lst'
|
||||
output_file = 'sum/input/ooni_domains.lst'
|
||||
|
||||
# Fetch and process OONI domains, and output to the specified file
|
||||
fetch_and_process_ooni_domains(output_file)
|
||||
fetch_and_process_ooni_domains(output_file)
|
||||
@ -1,5 +1,4 @@
|
||||
import socket
|
||||
import logging
|
||||
import concurrent.futures
|
||||
import threading
|
||||
import gc
|
||||
@ -9,20 +8,6 @@ import ipaddress
|
||||
from idna import encode as idna_encode
|
||||
from queue import Queue
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(level=logging.DEBUG, # Set the lowest level to capture all logs
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler("general_comm.log", mode='a'),
|
||||
logging.StreamHandler() # This will print logs to console as well
|
||||
])
|
||||
|
||||
# Additional error logging handler
|
||||
error_logger = logging.getLogger("error")
|
||||
error_handler = logging.FileHandler("error_comm.log", mode='a')
|
||||
error_handler.setLevel(logging.ERROR)
|
||||
error_logger.addHandler(error_handler)
|
||||
|
||||
# Lock for writing to the output file in a thread-safe way
|
||||
file_write_lock = threading.Lock()
|
||||
|
||||
@ -35,17 +20,15 @@ def resolve_domain(domain, max_retries=2):
|
||||
# Convert to punycode if necessary
|
||||
try:
|
||||
domain = idna_encode(domain).decode('utf-8')
|
||||
except Exception as e:
|
||||
error_logger.error(f"Punycode conversion failed for domain {domain}: {e}")
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
for _ in range(max_retries):
|
||||
try:
|
||||
ip_list = socket.gethostbyname_ex(domain)[2]
|
||||
ip_set.update(ip_list)
|
||||
logging.info(f"Resolved {domain} to IPs: {ip_list}")
|
||||
except socket.gaierror as e:
|
||||
error_logger.error(f"Could not resolve domain {domain}: {e}")
|
||||
except socket.gaierror:
|
||||
pass
|
||||
return list(ip_set)
|
||||
|
||||
# Function to check if IP is already covered by an existing CIDR
|
||||
@ -55,8 +38,8 @@ def is_ip_in_existing_cidr(ip, cidrs):
|
||||
for cidr in cidrs:
|
||||
if ip_obj in ipaddress.ip_network(cidr, strict=False):
|
||||
return True
|
||||
except ValueError as e:
|
||||
error_logger.error(f"Invalid IP or CIDR: {ip} - {cidr}: {e}")
|
||||
except ValueError:
|
||||
pass
|
||||
return False
|
||||
|
||||
# Function to get all CIDRs for a domain by resolving its IP addresses
|
||||
@ -68,19 +51,16 @@ def process_domain(domain, existing_cidrs):
|
||||
if not is_ip_in_existing_cidr(ip, existing_cidrs):
|
||||
cidrs.add(f"{ip}/32")
|
||||
return cidrs
|
||||
except Exception as e:
|
||||
error_logger.error(f"Error processing domain {domain}: {e}")
|
||||
except Exception:
|
||||
return set()
|
||||
|
||||
# Function to read domains from domains.lst file
|
||||
def read_domains_from_file(file_path="community.lst"):
|
||||
def read_domains_from_file(file_path="sum/input/community.lst"):
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
domains = [line.strip() for line in f.readlines() if line.strip()]
|
||||
logging.info(f"Read {len(domains)} domains from file.")
|
||||
return domains
|
||||
except FileNotFoundError as e:
|
||||
error_logger.error(f"File not found: {file_path}, {e}")
|
||||
except FileNotFoundError:
|
||||
return []
|
||||
|
||||
# Function to write CIDRs in batches to output file in a thread-safe way
|
||||
@ -93,7 +73,6 @@ def write_cidrs_to_file(filename="ip_community.lst"):
|
||||
with open(filename, 'a', encoding='utf-8') as f:
|
||||
for cidr in cidrs:
|
||||
f.write(f"{cidr}\n")
|
||||
logging.info(f"Written {len(cidrs)} CIDRs to {filename}")
|
||||
results_queue.task_done()
|
||||
|
||||
# Multithreading to handle large domain lists efficiently
|
||||
@ -102,19 +81,18 @@ def main():
|
||||
gc.enable()
|
||||
|
||||
# Read the domains from domains.lst file
|
||||
domains = read_domains_from_file("community.lst")
|
||||
domains = read_domains_from_file("sum/input/community.lst")
|
||||
if not domains:
|
||||
logging.info("No domains to process.")
|
||||
return
|
||||
|
||||
existing_cidrs = set() # Keep track of all CIDRs to exclude matching IPs
|
||||
|
||||
# Start the file writer thread
|
||||
writer_thread = threading.Thread(target=write_cidrs_to_file, args=("ip_community.lst",))
|
||||
writer_thread = threading.Thread(target=write_cidrs_to_file, args=("sum/input/ip_community.lst",))
|
||||
writer_thread.start()
|
||||
|
||||
# Use ThreadPoolExecutor to use more threads (set to 16 threads for better utilization)
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=35) as executor:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
|
||||
future_to_domain = {executor.submit(process_domain, domain, existing_cidrs): domain for domain in domains}
|
||||
|
||||
for future in concurrent.futures.as_completed(future_to_domain):
|
||||
@ -123,8 +101,8 @@ def main():
|
||||
domain_cidrs = future.result()
|
||||
if domain_cidrs:
|
||||
results_queue.put(domain_cidrs)
|
||||
except Exception as e:
|
||||
error_logger.error(f"Error with domain {domain}: {e}")
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
# Collect garbage after each domain processing to free memory
|
||||
gc.collect()
|
||||
|
||||
@ -74,7 +74,7 @@ def process_domain(domain, existing_cidrs):
|
||||
return set()
|
||||
|
||||
# Function to read domains from domains.lst file
|
||||
def read_domains_from_file(file_path="ooni_domains.lst"):
|
||||
def read_domains_from_file(file_path="input/ooni_domains.lst"):
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
domains = [line.strip() for line in f.readlines() if line.strip()]
|
||||
@ -85,7 +85,7 @@ def read_domains_from_file(file_path="ooni_domains.lst"):
|
||||
return []
|
||||
|
||||
# Function to write CIDRs in batches to output file in a thread-safe way
|
||||
def write_cidrs_to_file(filename="ip.lst"):
|
||||
def write_cidrs_to_file(filename="ooni_ips_temp.lst"):
|
||||
while True:
|
||||
cidrs = results_queue.get() # Fetch CIDRs from the queue
|
||||
if cidrs is None: # Sentinel value to stop the thread
|
||||
@ -103,7 +103,7 @@ def main():
|
||||
gc.enable()
|
||||
|
||||
# Read the domains from domains.lst file
|
||||
domains = read_domains_from_file("ooni_domains.lst")
|
||||
domains = read_domains_from_file("input/ooni_domains.lst")
|
||||
if not domains:
|
||||
logging.info("No domains to process.")
|
||||
return
|
||||
@ -111,7 +111,7 @@ def main():
|
||||
existing_cidrs = set() # Keep track of all CIDRs to exclude matching IPs
|
||||
|
||||
# Start the file writer thread
|
||||
writer_thread = threading.Thread(target=write_cidrs_to_file, args=("ip_ooni.lst",))
|
||||
writer_thread = threading.Thread(target=write_cidrs_to_file, args=("input/ooni_ips.lst",))
|
||||
writer_thread.start()
|
||||
|
||||
# Use ThreadPoolExecutor to use more threads (set to 16 threads for better utilization)
|
||||
|
||||
388
sum/input/community.lst
Normal file
388
sum/input/community.lst
Normal file
@ -0,0 +1,388 @@
|
||||
4pda.ru
|
||||
4pda.to
|
||||
abook-club.ru
|
||||
adguard.com
|
||||
afcs.dellcdn.com
|
||||
aftermarket.schaeffler.com
|
||||
aftermarket.zf.com
|
||||
alberta.ca
|
||||
anilibria.tv
|
||||
antizapret.prostovpn.org
|
||||
api.app.prod.grazie.aws.intellij.net
|
||||
api.github.com
|
||||
auth.openai.com
|
||||
ai.com
|
||||
adobe.com
|
||||
adobeereg.com
|
||||
adobelogin.com
|
||||
api.githubcopilot.com
|
||||
api.home-connect.com
|
||||
protonmail.ch
|
||||
api.radarr.video
|
||||
api.themoviedb.org
|
||||
aplawrence.com
|
||||
app.m3u.in
|
||||
app.paraswap.io
|
||||
app.zerossl.com
|
||||
appstorrent.ru
|
||||
apt.releases.hashicorp.com
|
||||
aqicn.org
|
||||
arc.intel.com
|
||||
artifacts.elastic.co
|
||||
atlassian.com
|
||||
auth.grazie.ai
|
||||
auth0.openai.com
|
||||
bard.google.com
|
||||
bbci.co.uk
|
||||
bitbucket.org
|
||||
bitcoin.org
|
||||
bitru.org
|
||||
booktracker.org
|
||||
boosteroid.com
|
||||
bosch-home.com
|
||||
bradyid.com
|
||||
builds.parsec.app
|
||||
bungiestore.com
|
||||
feed.rutracker.cc
|
||||
canva.com
|
||||
cdn.id.supercell.com
|
||||
cdn.oaistatic.com
|
||||
certifytheweb.com
|
||||
chatgpt.com
|
||||
checkout.buckaroo.nl
|
||||
chess.com
|
||||
cisco.com
|
||||
cdninstagram.com
|
||||
admin.instagram.com
|
||||
api.instagram.com
|
||||
autodiscover.instagram.com
|
||||
black.instagram.com
|
||||
blog.business.instagram.com
|
||||
blog.instagram.com
|
||||
business.instagram.com
|
||||
community.instagram.com
|
||||
developers.instagram.com
|
||||
help.instagram.com
|
||||
hyperlapse.instagram.com
|
||||
instagram.com
|
||||
i.instagram.com
|
||||
images.instagram.com
|
||||
l.instagram.com
|
||||
maps.instagram.com
|
||||
upload.instagram.com
|
||||
white.instagram.com
|
||||
citrix.com
|
||||
clamav.net
|
||||
cloud.mongodb.com
|
||||
claude.ai
|
||||
cloudflare-dns.com
|
||||
cloudflare-ech.com
|
||||
colta.ru
|
||||
cdn.betterttv.net
|
||||
chaturbate.com
|
||||
community.cisco.com
|
||||
copilot-proxy.githubusercontent.com
|
||||
crunchyroll.com
|
||||
czx.to
|
||||
d.docs.live.net
|
||||
database.clamav.net
|
||||
deezer.com
|
||||
tailscale.com
|
||||
pkgs.tailscale.com
|
||||
dl.tailscale.com
|
||||
dell.com
|
||||
deviantart.com
|
||||
devops.com
|
||||
digitalocean.com
|
||||
dist.torproject.org
|
||||
www.torproject.org
|
||||
docker.elastic.co
|
||||
docs.redis.com
|
||||
documentation.meraki.com
|
||||
download.jetbrains.com
|
||||
download.lenovo.com
|
||||
download.screamingfrog.co.uk
|
||||
download.wetransfer.com
|
||||
discord.gg
|
||||
discord.com
|
||||
dis.gd
|
||||
discord.media
|
||||
discord-attachments-uploads-prd.storage.googleapis.com
|
||||
dis.gd
|
||||
discord.co
|
||||
discord.com
|
||||
discord.design
|
||||
discord.dev
|
||||
discord.gg
|
||||
discord.gift
|
||||
discord.gifts
|
||||
discord.media
|
||||
discord.new
|
||||
discord.store
|
||||
discord.tools
|
||||
discordapp.com
|
||||
discordapp.net
|
||||
www.discord.com
|
||||
status.discord.com
|
||||
media.discordapp.net
|
||||
discordmerch.com
|
||||
discordpartygames.com
|
||||
discord-activities.com
|
||||
discordactivities.com
|
||||
discordsays.com
|
||||
discordstatus.com
|
||||
discord.com
|
||||
discord.gg
|
||||
discordapp.com
|
||||
discord-attachments-uploads-prd.storage.googleapis.com
|
||||
dis.gd
|
||||
discordstatus.com
|
||||
discord.media
|
||||
discordapp.net
|
||||
discordcdn.com
|
||||
discord.dev
|
||||
discord.new
|
||||
discord.gift
|
||||
discord.co
|
||||
dw.com
|
||||
edge.microsoft.com
|
||||
edgeservices.bing.com
|
||||
ej.ru
|
||||
quora.com
|
||||
etsy.com
|
||||
event.on24.com
|
||||
exler.ru
|
||||
e621.net
|
||||
static1.e621.net
|
||||
fex.net
|
||||
files.oaiusercontent.com
|
||||
filmix.biz
|
||||
firefly-ps.adobe.io
|
||||
flibusta.is
|
||||
fluke.com
|
||||
fork.pet
|
||||
forklog.com
|
||||
formula1.com
|
||||
forum.voynaplemyon.com
|
||||
fw-download.ubnt.com
|
||||
fw-update.ubnt.com
|
||||
gallery.zetalliance.org
|
||||
game.brawlstarsgame.com
|
||||
geni.us
|
||||
genius.com
|
||||
gitlab.io
|
||||
gllto.glpals.com
|
||||
disneyplus.com
|
||||
max.com
|
||||
music.youtube.com
|
||||
hbomax.com
|
||||
primevideo.com
|
||||
gnome-look.org
|
||||
googlevideo.com
|
||||
ytimg.com
|
||||
ggpht.com
|
||||
youtubekids.com
|
||||
yt.be
|
||||
gvt1.com
|
||||
youtube-nocookie.com
|
||||
youtube-ui.l.google.com
|
||||
youtubeembeddedplayer.googleapis.com
|
||||
youtube.googleapis.com
|
||||
youtubei.googleapis.com
|
||||
yt-video-upload.l.google.com
|
||||
wide-youtube.l.google.com
|
||||
yt3.ggpht.com
|
||||
googletagmanager.com
|
||||
gordonua.com
|
||||
grammarly.com
|
||||
grazie.ai
|
||||
g2a.com
|
||||
habr.com
|
||||
rezka.ag
|
||||
hdrezka.ag
|
||||
helm.releases.hashicorp.com
|
||||
hyperhost.ua
|
||||
ichef.bbci.co.uk
|
||||
id.cisco.com
|
||||
iichan.hk
|
||||
image.tmdb.org
|
||||
intel.com
|
||||
ipburger.com
|
||||
island-of-pleasure.site
|
||||
kemono.party
|
||||
kino.pub
|
||||
kpapp.link
|
||||
lenovo.com
|
||||
lib.rus.ec
|
||||
libgen.rs
|
||||
linuxiac.com
|
||||
localbitcoins.com
|
||||
login.amd.com
|
||||
lostfilm.tv
|
||||
m.strava.com
|
||||
macpaw.com
|
||||
macvendors.com
|
||||
malwarebytes.com
|
||||
master.qt.io
|
||||
mattermost.com
|
||||
medium.com
|
||||
megapeer.ru
|
||||
memohrc.org
|
||||
metacritic.com
|
||||
meteo.paraplan.net
|
||||
mobile.events.data.microsoft.com
|
||||
monster.ie
|
||||
mouser.com
|
||||
mrakopedia.net
|
||||
myworld-portal.leica-geosystems.com
|
||||
netflix.com
|
||||
nih.gov
|
||||
nordvpn.com
|
||||
nordaccount.com
|
||||
nordcdn.com
|
||||
notion.com
|
||||
ntc.party
|
||||
nyaa.si
|
||||
nyaa.tracker.wf
|
||||
oaistatic.com
|
||||
oasis.app
|
||||
omv-extras.org
|
||||
onlineradiobox.com
|
||||
onlinesim.ru
|
||||
openai.com
|
||||
chat.openai.com
|
||||
api.openai.com
|
||||
openwrt.wk.cz
|
||||
openai.com.cdn.cloudflare.net
|
||||
openaicomproductionae4b.blob.core.windows.net
|
||||
openaicom-api-bdcpf8c6d2e9atf6.z01.azurefd.net
|
||||
openaiapi-site.azureedge.net
|
||||
os.mbed.com
|
||||
packages.gitlab.com
|
||||
pandasecurity.com
|
||||
path3.xtracloud.net
|
||||
patreon.com
|
||||
pay.buckaroo.nl
|
||||
paypal.com
|
||||
pb.wtf
|
||||
pcbway.com
|
||||
pcbway.ru
|
||||
php.su
|
||||
piccy.info
|
||||
pixiv.net
|
||||
production-openaicom-storage.azureedge.net
|
||||
proxyscrape.com
|
||||
plab.site
|
||||
platform.twitter.com
|
||||
plugins.jetbrains.com
|
||||
polit.ru
|
||||
pornolab.net
|
||||
pornhub.com
|
||||
portal.bgpmon.net
|
||||
protonmail.com
|
||||
quora.com
|
||||
radarr.servarr.com
|
||||
raw-data.gitlab.io
|
||||
redis.com
|
||||
redtube.com
|
||||
refactoring.guru
|
||||
refinitiv.com
|
||||
registry.terraform.io
|
||||
releases.hashicorp.com
|
||||
repo.mongodb.org
|
||||
republic.ru
|
||||
rutor.info
|
||||
rutor.is
|
||||
resp.app
|
||||
rpm.grafana.com
|
||||
ru.depositphotos.com
|
||||
stream.voidboost.cc
|
||||
fingerium.stream.voidboost.cc
|
||||
ununbium.stream.voidboost.cc
|
||||
atomic.stream.voidboost.cc
|
||||
ranger.stream.voidboost.cc
|
||||
ru.iherb.com
|
||||
rus.delfi.lv
|
||||
saverudata.info
|
||||
searchfloor.org
|
||||
sebeanus.online
|
||||
secure-web.cisco.com
|
||||
seedoff.zannn.top
|
||||
semnasem.org
|
||||
software-static.download.prss.microsoft.com
|
||||
sovanews.tv
|
||||
sentry.io
|
||||
stockx.com
|
||||
serpstat.com
|
||||
singlekey-id.com
|
||||
skladchik.com
|
||||
skyscanner.com
|
||||
slideshare.net
|
||||
soapui.org
|
||||
software.cisco.com
|
||||
soundcloud.com
|
||||
spotify.com
|
||||
streamable.com
|
||||
st.kinovod.net
|
||||
static.files.bbci.co.uk
|
||||
static.rutracker.cc
|
||||
strava.com
|
||||
stulchik.net
|
||||
support.cambiumnetworks.com
|
||||
support.huawei.com
|
||||
support.ruckuswireless.com
|
||||
swagger.io
|
||||
sysdig.com
|
||||
terraform.io
|
||||
themoviedb.org
|
||||
tidal.com
|
||||
tiktok.com
|
||||
timberland.com
|
||||
tmdb.org
|
||||
tokenized.play.google.com
|
||||
tools.cisco.com
|
||||
torrent.by
|
||||
tr.anidub.com
|
||||
tracker.opentrackr.org
|
||||
tutanota.com
|
||||
twimg.com
|
||||
abs-0.twimg.com
|
||||
ufile.io
|
||||
underver.se
|
||||
unfiltered.adguard-dns.com
|
||||
unian.net
|
||||
uniongang.tv
|
||||
vectorworks.net
|
||||
velocidrone.com
|
||||
veritas.com
|
||||
vipdrive.net
|
||||
vrv.co
|
||||
vyos.io
|
||||
watchguard.com
|
||||
weather.com
|
||||
weblance.com.ua
|
||||
welt.de
|
||||
wetransfer.com
|
||||
wheather.com
|
||||
windguru.cz
|
||||
wixmp.com
|
||||
youtube.com
|
||||
wunderground.com
|
||||
www.analog.com
|
||||
www.hetzner.com
|
||||
www.hrw.org
|
||||
www.jabra.com
|
||||
www.microchip.com
|
||||
www.postfix.org
|
||||
www.qualcomm.com
|
||||
www.smashwords.com
|
||||
www.stalker2.com
|
||||
www.support.xerox.com
|
||||
www.youtube.com
|
||||
www.ti.com
|
||||
www.wikiart.org
|
||||
www.wunderground.com
|
||||
xhamster.com
|
||||
xhamsterlive.com
|
||||
znanija.com
|
||||
zohomail.com
|
||||
2321
sum/input/discord_ips.lst
Normal file
2321
sum/input/discord_ips.lst
Normal file
File diff suppressed because it is too large
Load Diff
41872
sum/input/domains.lst
Normal file
41872
sum/input/domains.lst
Normal file
File diff suppressed because it is too large
Load Diff
71128
sum/input/ip.lst
Normal file
71128
sum/input/ip.lst
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user