3 # Convert our XML file to a JSON file as accepted by Mitre for CNA purposes
4 # as per https://github.com/CVEProject/automation-working-group/blob/master/cve_json_schema/DRAFT-JSON-file-format-v4.md
6 # ASF httpd and OpenSSL use quite similar files, so this script is designed to work with either
9 from xml.dom import minidom
11 import simplejson as json
14 from optparse import OptionParser
19 from jsonschema import validate
20 from jsonschema import Draft4Validator
23 # Specific project stuff is here
24 import vulnxml2jsonproject as cfg
26 # Location of CVE JSON schema (default, can use local file etc)
27 default_cve_schema = "https://raw.githubusercontent.com/CVEProject/automation-working-group/master/cve_json_schema/CVE_JSON_4.0_min_public.schema"
29 parser = OptionParser()
30 parser.add_option("-s", "--schema", help="location of schema to check (default "+default_cve_schema+")", default=default_cve_schema,dest="schema")
31 parser.add_option("-i", "--input", help="input vulnerability file vulnerabilities.xml", dest="input")
32 parser.add_option("-c", "--cve", help="comma separated list of cve names to generate a json file for (or all)", dest="cves")
33 parser.add_option("-o", "--outputdir", help="output directory for json file (default ./)", default=".", dest="outputdir")
34 (options, args) = parser.parse_args()
37 print "needs input file"
43 response = urllib.urlopen(options.schema)
45 print "Problem opening schema: try downloading it manually then specify it using --schema option: %s" % options.schema
47 schema_doc = json.loads(response.read())
51 with codecs.open(options.input,"r","utf-8") as vulnfile:
52 vulns = vulnfile.read()
53 dom = minidom.parseString(vulns.encode("utf-8"))
55 for issue in dom.getElementsByTagName('issue'):
56 if not issue.getElementsByTagName('cve'):
58 # ASF httpd has CVE- prefix, but OpenSSL does not, make either work
59 cvename = issue.getElementsByTagName('cve')[0].getAttribute('name').replace('CVE-','')
62 if (options.cves): # If we only want a certain list of CVEs, skip the rest
63 if (not cvename in options.cves):
67 cve['data_type']="CVE"
68 cve['data_format']="MITRE"
69 cve['data_version']="4.0"
70 cve['CVE_data_meta']= { "ID": "CVE-"+cvename, "ASSIGNER": cfg.config['cve_meta_assigner'], "STATE":"PUBLIC" }
71 datepublic = issue.getAttribute("public")
73 cve['CVE_data_meta']['DATE_PUBLIC'] = datepublic[:4]+'-'+datepublic[4:6]+'-'+datepublic[6:8]
74 if issue.getElementsByTagName('title'):
75 cve['CVE_data_meta']['TITLE'] = issue.getElementsByTagName('title')[0].childNodes[0].nodeValue.strip()
77 for d in issue.getElementsByTagName('description')[0].childNodes:
78 # if d.nodeType == d.ELEMENT_NODE:
81 desc += re.sub('<[^<]+?>', '', d.toxml().strip())
82 desc = HTMLParser.HTMLParser().unescape(desc)
83 problemtype = "(undefined)"
84 if issue.getElementsByTagName('problemtype'):
85 problemtype = issue.getElementsByTagName('problemtype')[0].childNodes[0].nodeValue.strip()
86 cve['problemtype'] = { "problemtype_data": [ { "description" : [ { "lang":"eng", "value": problemtype} ] } ] }
87 impact = issue.getElementsByTagName('impact') # openssl does it like this
89 cve['impact'] = [ { "lang":"eng", "value":impact[0].getAttribute('severity'), "url":cfg.config['security_policy_url']+impact[0].getAttribute('severity') } ]
90 impact = issue.getElementsByTagName('severity') # httpd does it like this
92 cve['impact'] = [ { "lang":"eng", "value":impact[0].childNodes[0].nodeValue, "url":cfg.config['security_policy_url']+impact[0].childNodes[0].nodeValue } ]
94 # Create the list of credits
97 for reported in issue.getElementsByTagName('reported'): # openssl style credits
98 credit.append( { "lang":"eng", "value":re.sub('[\n ]+',' ', reported.getAttribute("source"))} )
99 for reported in issue.getElementsByTagName('acknowledgements'): # ASF httpd style credits
100 credit.append( { "lang":"eng", "value":re.sub('[\n ]+',' ', reported.childNodes[0].nodeValue.strip())} )
104 # Create the list of references
107 for adv in issue.getElementsByTagName('advisory'):
108 url = adv.getAttribute("url")
109 if (not url.startswith("htt")):
110 url = cfg.config['default_reference_prefix']+url
111 refs.append({"url":url,"name":url,"refsource":"CONFIRM"})
112 for git in issue.getElementsByTagName('git'): # openssl style references to git
113 url = cfg.config['git_prefix']+git.getAttribute("hash")
114 refs.append({"url":url,"name":url,"refsource":"CONFIRM"})
115 if cfg.config['project'] == 'httpd': # ASF httpd has no references so fake them
116 for fixed in issue.getElementsByTagName('fixed'):
117 base = "".join(fixed.getAttribute("version").split('.')[:-1])
118 refurl = cfg.config['default_reference']+base+".html#CVE-"+cvename
119 refs.append({"url":refurl,"name":refurl,"refsource":"CONFIRM"})
121 cve['references'] = { "reference_data": refs }
123 # Create the "affected products" list
126 for affects in issue.getElementsByTagName('fixed'): # OpenSSL and httpd since April 2018 does it this way
127 text = "Fixed in %s %s (Affected %s)" %(cfg.config['product_name'],affects.getAttribute('version'),cfg.merge_affects(issue,affects.getAttribute("base")))
128 # Let's condense into a list form since the format of this field is 'free text' at the moment, not machine readable (as per mail with George Theall)
129 vv.append({"version_value":text})
130 # Mitre want the fixed/affected versions in the text too
133 # if issue.getAttribute('fixed'): # httpd used to do it this way
134 # base = ".".join(issue.getAttribute("fixed").split('.')[:-1])+"."
135 # text = "Fixed in %s %s (Affected %s)" %(cfg.config['product_name'],issue.getAttribute('fixed'),cfg.merge_affects(issue,base))
136 # vv.append({"version_value":text})
137 # # Mitre want the fixed/affected versions in the text too
138 # desc += " "+text+"."
140 cve['affects'] = { "vendor" : { "vendor_data" : [ { "vendor_name": cfg.config['vendor_name'], "product": { "product_data" : [ { "product_name": cfg.config['product_name'], "version": { "version_data" : vv}}]}}]}}
142 # Mitre want newlines and excess spaces stripped
143 desc = re.sub('[\n ]+',' ', desc)
144 cve['description'] = { "description_data": [ { "lang":"eng", "value": desc} ] }
148 fn = issue['CVE_data_meta']['ID'] + ".json"
152 f = codecs.open(options.outputdir+"/"+fn, 'w', 'utf-8')
153 f.write(json.dumps(issue, sort_keys=True, indent=4, separators=(',',': ')))
154 print "wrote %s" %(options.outputdir+"/"+fn)
158 validate(issue, schema_doc)
159 print "%s passed validation" % (fn)
160 except jsonschema.exceptions.ValidationError as incorrect:
161 v = Draft4Validator(schema_doc)
162 errors = sorted(v.iter_errors(issue), key=lambda e: e.path)
164 print "%s did not pass validation: %s" % (fn,str(error.message))
166 print "%s skipping validation, no schema defined" %(fn)