For those who use http://malc0de.com/database/ and/or http://vxvault.siri-urz.net/ViriList.php as malware samples provider, below scripts will download automatically how many samples you need from those . All you need is python 2.7
by default, samples are saved on c:\malware, but can be changed using "-d new location", also user agent used to download samples can be changed, nr of threads, etc
to list all available
- no warranty express or implied
- free to use if you don’t use-it to gain money
Malc0de downloader:
Warning: Downloaded files can harm you computer.
Warning: Downloaded files can harm you computer.
http://virii.tk/malc0de-com-samples-downloader-v3-3/
http://virii.tk/vx-vault-samples-downloader-v1-3/
by default, samples are saved on c:\malware, but can be changed using "-d new location", also user agent used to download samples can be changed, nr of threads, etc
to list all available
Code: Select all
License:script.py -h
- no warranty express or implied
- free to use if you don’t use-it to gain money
Malc0de downloader:
Warning: Downloaded files can harm you computer.
Code: Select all
Vx Vault downloader:import re
import time
import urllib2
import hashlib
import os
import random
import Queue
import threading
import argparse
print """
Malc0de.com samples downloader v3.3
)\._.,--....,'``.
.b--. /; _.. \ _\ (`._ ,.
`=,-,-'~~~ `----(,_..'--(,_..'`-.;.'
http://virii.tk http://twitter.com/ViRiiTk
"""
parser = argparse.ArgumentParser(description="Malc0de.com samples downloader v3.3")
parser.add_argument("nr_samples", type=int,
help= "Number of samples you want to download")
parser.add_argument("-t", "--nr_threads", metavar="threads", type=int, default=200,
help= "Threads number (Default: 200)")
parser.add_argument("-a", "--agent", default="Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
help= "User Agent used to download samples")
parser.add_argument("-d", "--dldfolder", default = "C:\malware\\",
help= "Local folder to download samples (Default: C:\malware\\ )")
parser.add_argument("-i", "--info", default = "_files.txt",
help = "file to store info about downloaded samples (Default: _files.txt)")
parser.add_argument("-e", "--error", default = "_errors.txt",
help = "file to store errors (Default: _errors.txt)")
parser.add_argument("-u", "--malurl", default = "_mal_url.txt",
help = "file to store malware urls (Default: _mal_url.txt)")
args = parser.parse_args()
#create download folder if not exist
if not os.path.isdir(args.dldfolder):
os.mkdir(args.dldfolder)
#limit the number of download samples
if args.nr_samples > 10000:
print "You need very Very VERY many samples, 5k is enough for you"
args.nr_samples = 4999
if args.nr_threads >= args.nr_samples:
args.nr_threads = args.nr_samples
print "Try to download latest %i samples" %(args.nr_samples)
print "Threads: %i" %(args.nr_threads)
print "Malware samples will be downloaded to %s" %(args.dldfolder), "\n"
#construct user agents
dldagent = {'User-Agent' : args.agent}
useragent = { 'User-Agent' : 'Malc0de.com samples downloader v3.3 http://ViRii.Tk'}
#queue
q = Queue.Queue()
#generate random string
def get_random_word(a):
word = ''
for i in range(a):
word += random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')
return word
#md5 file
def md5Checksum(filePath):
fh = open(filePath, 'rb')
m = hashlib.md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
#nr paginilor ce trebuie vizitate
counter = 0
if args.nr_samples % 50 == 0:
pages = args.nr_samples / 50
else :
pages = (args.nr_samples / 50) + 1
#find all malware address on curent page
def getmalware(pagina):
global counter
b = re.findall("<td>[\d]{4}-[\d]{2}-[\d]{2}<\/td>\n.+\n", pagina)
if b:
for i in b:
data = re.search("<td>([\d]{4}-[\d]{2}-[\d]{2})<\/td>", i)
malware = re.search("\t<td>(.+)<\/td>", i)
if data and malware:
malware= re.sub("<br\/>", "",malware.group(1) )
#print data.group(1), malware
if counter >= args.nr_samples:
return
else:
q.put(malware)
counter += 1
#browsing pages
print "Browsing pages:"
for i in range(1, pages + 1):
adresa = "http://malc0de.com/database/?&page=" + str(i)
print "Searching on:", adresa
time.sleep(3) # pauza intre pagini (s)
try:
req = urllib2.Request(adresa, None, useragent)
response = urllib2.urlopen(req)
continut = response.read()
getmalware(continut)
except Exception as e:
print e
pass
def dld_mal(url_mal):
#downloading malware samples
#write address of this sample
with open(args.dldfolder + args.malurl, "a") as handle:
handle.write(url_mal + "\n")
handle.close()
#get file name
file_name = url_mal.split("/")[-1]
#remove bad characters from file name
if len(file_name)==0 or re.search("\?", file_name) or re.search("\&", file_name):
file_name = "No_name" + str(get_random_word(8))
#try to download sample
try:
#check if url start with "http://
if url_mal[:7] != "http://":
url_mal = "http://" + url_mal
#construct url and set timeout
url_construct = urllib2.Request(url_mal, None, dldagent)
u = urllib2.urlopen(url_construct, timeout = 59) #timeout 1 min
# every downloaded malware will have a uniq name: "Malware_sample" + "_" + 3 random characters
f_name = args.dldfolder + str(file_name) +"_" + get_random_word(3)
#write to file
f = open(f_name, 'wb')
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
f.write(buffer)
f.close()
#write info to _files.txt
with open(args.dldfolder + args.info, "a") as handle:
md5hash = md5Checksum(f_name)
handle.write(str(md5Checksum(f_name)) +"\t" + str(file_name)+ "\t" + url_mal + "\n")
handle.close
print "\n" + "Am descarcat: " + file_name,
except Exception as e:
#adding error to _errors.txt
with open(args.dldfolder + args.error, "a") as handle:
handle.write(url_mal + "\t" + str(e) + "\n")
handle.close()
pass
print "Downloading:",
def worker():
while True:
if not q.empty():
item = q.get()
dld_mal(item)
q.task_done()
for i in range(args.nr_threads):
t = threading.Thread(target=worker)
t.daemon = True
t.start()
q.join()
exit()
Warning: Downloaded files can harm you computer.
Code: Select all
Source: import os
import urllib2
import hashlib
import argparse
import random
import re
import Queue
import threading
print """
Vx Vault samples downloader
_.---.._ _.---...__
.-' /\ \ .' /\ /
`. ( ) \ / ( ) /
`. \/ .'\ /`. \/ .'
``---'' ) ( ``---''
.';.--.;`.
.' /_...._\ `.
.' `.a a.' `.
( \/ )
`.___..-'`-..___.'
\ v1.3 /
`-.____.-'
http://virii.tk http://twitter.com/ViRiiTk
"""
parser = argparse.ArgumentParser(description="Vx Vault samples downloader v1.3")
parser.add_argument("nr_samples", type=int,
help= "How many samples you want to download")
parser.add_argument("-t", "--nr_threads", metavar="threads", type=int, default=200,
help= "Threads number (Default: 200)")
parser.add_argument("-a", "--agent", default="Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
help= "User Agent used to download samples")
parser.add_argument("-d", "--dldfolder", default = "C:\malware\\",
help= "Local folder to download samples (Default: C:\malware\\ )")
parser.add_argument("-i", "--info", default = "_files.txt",
help = "file to store info about downloaded samples (Default: _files.txt)")
parser.add_argument("-e", "--error", default = "_errors.txt",
help = "file to store errors (Default: _errors.txt)")
parser.add_argument("-u", "--malurl", default = "_mal_url.txt",
help = "file to store malware urls (Default: _mal_url.txt)")
args = parser.parse_args()
#limit the number of download samples
if args.nr_samples > 10000:
print "You need very Very VERY many samples, 5k is enough for you"
args.nr_samples = 4999
#create download folder if not exist
if not os.path.isdir(args.dldfolder):
os.mkdir(args.dldfolder)
print "Malware samples will be downloaded to %s" %(args.dldfolder)
print "Try to download latest %i samples" %(args.nr_samples)
print "Threads: %i" %(args.nr_threads), "\n"
#construct user agents
dldagent = {'User-Agent' : args.agent}
useragent = {'User-Agent' : "Samples downloader v1.3 http://ViRii.Tk"}
#generate random string
def get_random_word(a):
word = ''
for i in range(a):
word += random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz')
return word
#md5 file
def md5Checksum(filePath):
fh = open(filePath, 'rb')
m = hashlib.md5()
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m.hexdigest()
q = Queue.Queue()
adresa = "http://vxvault.siri-urz.net/ViriList.php?s=0&m=" + str(args.nr_samples)
try:
req = urllib2.Request(adresa, None, useragent)
continut_pagina = urllib2.urlopen(req).read()
#print continut_pagina
except Exception as e:
exit(e)
#find all malware address
pagina = continut_pagina.split("\r")
for i in pagina:
match = re.search("href='ViriFiche\.php\?ID=[\d]+'>(.+)</a></TD>", i)
if match:
temp_mal_address = match.group(1)
if not re.search("[\d]{1,2}-[\d]{1,2}", temp_mal_address):
#print temp_mal_address
#add malware address in download queue
q.put(temp_mal_address)
#downloading malware samples
def dld_mal(url_mal):
#write in "_mal_url.txt" address of this sample
with open(args.dldfolder + args.malurl, "a") as handle:
handle.write(url_mal + "\n")
handle.close()
#get file name
file_name = url_mal.split("/")[-1]
#remove bad characters from file name
if len(file_name)==0 or re.search("\?", file_name) or re.search("\&", file_name):
file_name = "No_name" + str(get_random_word(8))
#try to download sample
try:
#check if url start with "http://
if url_mal[:7] != "http://":
url_mal = "http://" + url_mal
#construct url and set timeout
url_construct = urllib2.Request(url_mal, None, dldagent)
u = urllib2.urlopen(url_construct, timeout = 59) #timeout 1 min
# every downloaded malware will have a uniq name: "Malware_sample" + "_" + 3 random characters
f_name = args.dldfolder + str(file_name) +"_" + get_random_word(3)
#write to file
f = open(f_name, 'wb')
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
f.write(buffer)
f.close()
#write info to _files.txt
with open(args.dldfolder + args.info, "a") as handle:
md5hash = md5Checksum(f_name)
handle.write(str(md5Checksum(f_name)) +"\t" + str(file_name)+ "\t" + url_mal + "\n")
handle.close
print "\n" + "Am descarcat: " + file_name,
except Exception as e:
#adding error to _errors.txt
with open(args.dldfolder + args.error, "a") as handle:
handle.write(url_mal + "\t" + str(e) + "\n")
handle.close()
pass
#creating download threads
print "Downloading:",
def worker():
while True:
if not q.empty():
item = q.get()
dld_mal(item)
q.task_done()
for i in range(args.nr_threads):
t = threading.Thread(target=worker)
t.daemon = True
t.start()
q.join()
exit()
http://virii.tk/malc0de-com-samples-downloader-v3-3/
http://virii.tk/vx-vault-samples-downloader-v1-3/