This was used for generating the website search, which now just calls
out to google. Remove it
Signed-off-by: Cole Robinson <crobinso(a)redhat.com>
---
docs/index.py | 1266 -------------------------------------------------
1 file changed, 1266 deletions(-)
delete mode 100755 docs/index.py
diff --git a/docs/index.py b/docs/index.py
deleted file mode 100755
index 0d07ca4d05..0000000000
--- a/docs/index.py
+++ /dev/null
@@ -1,1266 +0,0 @@
-#!/usr/bin/env python2
-#
-# imports the API description and fills up a database with
-# name relevance to modules, functions or web pages
-#
-# Operation needed:
-# =================
-#
-# install mysqld, the python wrappers for mysql and libxml2, start mysqld
-# - mysql-server
-# - mysql
-# - php-mysql
-# - MySQL-python
-# Change the root passwd of mysql:
-# mysqladmin -u root password new_password
-# Create the new database libvir
-# mysqladmin -p create libvir
-# Create a database user 'veillard' and give him password access
-# change veillard and abcde with the right user name and passwd
-# mysql -p
-# password:
-# mysql> GRANT ALL PRIVILEGES ON libvir TO veillard@localhost
-# IDENTIFIED BY 'abcde' WITH GRANT OPTION;
-# mysql> GRANT ALL PRIVILEGES ON libvir.* TO veillard@localhost
-# IDENTIFIED BY 'abcde' WITH GRANT OPTION;
-#
-# As the user check the access:
-# mysql -p libvir
-# Enter password:
-# Welcome to the MySQL monitor....
-# mysql> use libvir
-# Database changed
-# mysql> quit
-# Bye
-#
-# Then run the script in the doc subdir, it will create the symbols and
-# word tables and populate them with information extracted from
-# the libvirt-api.xml API description, and make then accessible read-only
-# by nobody@loaclhost the user expected to be Apache's one
-#
-# On the Apache configuration, make sure you have php support enabled
-#
-
-import MySQLdb
-import libxml2
-import sys
-import string
-import os
-
-#
-# We are not interested in parsing errors here
-#
-def callback(ctx, str):
- return
-libxml2.registerErrorHandler(callback, None)
-
-#
-# The dictionary of tables required and the SQL command needed
-# to create them
-#
-TABLES = {
- "symbols": """CREATE TABLE symbols (
- name varchar(255) BINARY NOT NULL,
- module varchar(255) BINARY NOT NULL,
- type varchar(25) NOT NULL,
- descr varchar(255),
- UNIQUE KEY name (name),
- KEY module (module))""",
- "words": """CREATE TABLE words (
- name varchar(50) BINARY NOT NULL,
- symbol varchar(255) BINARY NOT NULL,
- relevance int,
- KEY name (name),
- KEY symbol (symbol),
- UNIQUE KEY ID (name, symbol))""",
- "wordsHTML": """CREATE TABLE wordsHTML (
- name varchar(50) BINARY NOT NULL,
- resource varchar(255) BINARY NOT NULL,
- section varchar(255),
- id varchar(50),
- relevance int,
- KEY name (name),
- KEY resource (resource),
- UNIQUE KEY ref (name, resource))""",
- "wordsArchive": """CREATE TABLE wordsArchive (
- name varchar(50) BINARY NOT NULL,
- ID int(11) NOT NULL,
- relevance int,
- KEY name (name),
- UNIQUE KEY ref (name, ID))""",
- "pages": """CREATE TABLE pages (
- resource varchar(255) BINARY NOT NULL,
- title varchar(255) BINARY NOT NULL,
- UNIQUE KEY name (resource))""",
- "archives": """CREATE TABLE archives (
- ID int(11) NOT NULL auto_increment,
- resource varchar(255) BINARY NOT NULL,
- title varchar(255) BINARY NOT NULL,
- UNIQUE KEY id (ID,resource(255)),
- INDEX (ID),
- INDEX (resource))""",
- "Queries": """CREATE TABLE Queries (
- ID int(11) NOT NULL auto_increment,
- Value varchar(50) NOT NULL,
- Count int(11) NOT NULL,
- UNIQUE KEY id (ID,Value(35)),
- INDEX (ID))""",
- "AllQueries": """CREATE TABLE AllQueries (
- ID int(11) NOT NULL auto_increment,
- Value varchar(50) NOT NULL,
- Count int(11) NOT NULL,
- UNIQUE KEY id (ID,Value(35)),
- INDEX (ID))""",
-}
-
-#
-# The XML API description file to parse
-#
-API = "libvirt-api.xml"
-DB = None
-
-#########################################################################
-# #
-# MySQL database interfaces #
-# #
-#########################################################################
-def createTable(db, name):
- global TABLES
-
- if db is None:
- return -1
- if name is None:
- return -1
- c = db.cursor()
-
- ret = c.execute("DROP TABLE IF EXISTS %s" % (name))
- if ret == 1:
- print "Removed table %s" % (name)
- print "Creating table %s" % (name)
- try:
- ret = c.execute(TABLES[name])
- except:
- print "Failed to create table %s" % (name)
- return -1
- return ret
-
-def checkTables(db, verbose=1):
- global TABLES
-
- if db is None:
- return -1
- c = db.cursor()
- nbtables = c.execute("show tables")
- if verbose:
- print "Found %d tables" % (nbtables)
- tables = {}
- i = 0
- while i < nbtables:
- l = c.fetchone()
- name = l[0]
- tables[name] = {}
- i = i + 1
-
- for table in TABLES.keys():
- if not tables.has_key(table):
- print "table %s missing" % (table)
- createTable(db, table)
- try:
- ret = c.execute("SELECT count(*) from %s" % table)
- row = c.fetchone()
- if verbose:
- print "Table %s contains %d records" % (table, row[0])
- except:
- print "Troubles with table %s: repairing" % (table)
- ret = c.execute("repair table %s" % table)
- print "repairing returned %d" % (ret)
- ret = c.execute("SELECT count(*) from %s" % table)
- row = c.fetchone()
- print "Table %s contains %d records" % (table, row[0])
- if verbose:
- print "checkTables finished"
-
- # make sure apache can access the tables read-only
- try:
- ret = c.execute("GRANT SELECT ON libvir.* TO nobody@localhost")
- ret = c.execute("GRANT INSERT,SELECT,UPDATE ON libvir.Queries TO
nobody@localhost")
- except:
- pass
- return 0
-
-def openMySQL(db="libvir", passwd=None, verbose=1):
- global DB
-
- if passwd is None:
- try:
- passwd = os.environ["MySQL_PASS"]
- except:
- print "No password available, set environment MySQL_PASS"
- sys.exit(1)
-
- DB = MySQLdb.connect(passwd=passwd, db=db)
- if DB is None:
- return -1
- ret = checkTables(DB, verbose)
- return ret
-
-def updateWord(name, symbol, relevance):
- global DB
-
- if DB is None:
- openMySQL()
- if DB is None:
- return -1
- if name is None:
- return -1
- if symbol is None:
- return -1
-
- c = DB.cursor()
- try:
- ret = c.execute(
-"""INSERT INTO words (name, symbol, relevance) VALUES
('%s','%s', %d)""" %
- (name, symbol, relevance))
- except:
- try:
- ret = c.execute(
- """UPDATE words SET relevance = %d where name = '%s' and
symbol = '%s'""" %
- (relevance, name, symbol))
- except:
- print "Update word (%s, %s, %s) failed command" % (name, symbol,
relevance)
- print "UPDATE words SET relevance = %d where name = '%s' and
symbol = '%s'" % (relevance, name, symbol)
- print sys.exc_type, sys.exc_value
- return -1
-
- return ret
-
-def updateSymbol(name, module, type, desc):
- global DB
-
- updateWord(name, name, 50)
- if DB is None:
- openMySQL()
- if DB is None:
- return -1
- if name is None:
- return -1
- if module is None:
- return -1
- if type is None:
- return -1
-
- try:
- desc = string.replace(desc, "'", " ")
- l = string.split(desc, ".")
- desc = l[0]
- desc = desc[0:99]
- except:
- desc = ""
-
- c = DB.cursor()
- try:
- ret = c.execute(
-"""INSERT INTO symbols (name, module, type, descr) VALUES
('%s','%s', '%s', '%s')""" %
- (name, module, type, desc))
- except:
- try:
- ret = c.execute(
-"""UPDATE symbols SET module='%s', type='%s',
descr='%s' where name='%s'""" %
- (module, type, desc, name))
- except:
- print "Update symbol (%s, %s, %s) failed command" % (name, module,
type)
- print """UPDATE symbols SET module='%s',
type='%s', descr='%s' where name='%s'""" % (module,
type, desc, name)
- print sys.exc_type, sys.exc_value
- return -1
-
- return ret
-
-def addFunction(name, module, desc=""):
- return updateSymbol(name, module, 'function', desc)
-
-def addMacro(name, module, desc=""):
- return updateSymbol(name, module, 'macro', desc)
-
-def addEnum(name, module, desc=""):
- return updateSymbol(name, module, 'enum', desc)
-
-def addStruct(name, module, desc=""):
- return updateSymbol(name, module, 'struct', desc)
-
-def addConst(name, module, desc=""):
- return updateSymbol(name, module, 'const', desc)
-
-def addType(name, module, desc=""):
- return updateSymbol(name, module, 'type', desc)
-
-def addFunctype(name, module, desc=""):
- return updateSymbol(name, module, 'functype', desc)
-
-def addPage(resource, title):
- global DB
-
- if DB is None:
- openMySQL()
- if DB is None:
- return -1
- if resource is None:
- return -1
-
- c = DB.cursor()
- try:
- ret = c.execute(
- """INSERT INTO pages (resource, title) VALUES
('%s','%s')""" %
- (resource, title))
- except:
- try:
- ret = c.execute(
- """UPDATE pages SET title='%s' WHERE
resource='%s'""" %
- (title, resource))
- except:
- print "Update symbol (%s, %s, %s) failed command" % (name, module,
type)
- print """UPDATE pages SET title='%s' WHERE
resource='%s'""" % (title, resource)
- print sys.exc_type, sys.exc_value
- return -1
-
- return ret
-
-def updateWordHTML(name, resource, desc, id, relevance):
- global DB
-
- if DB is None:
- openMySQL()
- if DB is None:
- return -1
- if name is None:
- return -1
- if resource is None:
- return -1
- if id is None:
- id = ""
- if desc is None:
- desc = ""
- else:
- try:
- desc = string.replace(desc, "'", " ")
- desc = desc[0:99]
- except:
- desc = ""
-
- c = DB.cursor()
- try:
- ret = c.execute(
-"""INSERT INTO wordsHTML (name, resource, section, id, relevance) VALUES
('%s','%s', '%s', '%s', '%d')""" %
- (name, resource, desc, id, relevance))
- except:
- try:
- ret = c.execute(
-"""UPDATE wordsHTML SET section='%s', id='%s',
relevance='%d' where name='%s' and resource='%s'"""
%
- (desc, id, relevance, name, resource))
- except:
- print "Update symbol (%s, %s, %d) failed command" % (name,
resource, relevance)
- print """UPDATE wordsHTML SET section='%s',
id='%s', relevance='%d' where name='%s' and
resource='%s'""" % (desc, id, relevance, name, resource)
- print sys.exc_type, sys.exc_value
- return -1
-
- return ret
-
-def checkXMLMsgArchive(url):
- global DB
-
- if DB is None:
- openMySQL()
- if DB is None:
- return -1
- if url is None:
- return -1
-
- c = DB.cursor()
- try:
- ret = c.execute(
- """SELECT ID FROM archives WHERE
resource='%s'""" % (url))
- row = c.fetchone()
- if row is None:
- return -1
- except:
- return -1
-
- return row[0]
-
-def addXMLMsgArchive(url, title):
- global DB
-
- if DB is None:
- openMySQL()
- if DB is None:
- return -1
- if url is None:
- return -1
- if title is None:
- title = ""
- else:
- title = string.replace(title, "'", " ")
- title = title[0:99]
-
- c = DB.cursor()
- try:
- cmd = """INSERT INTO archives (resource, title) VALUES
('%s','%s')""" % (url, title)
- ret = c.execute(cmd)
- cmd = """SELECT ID FROM archives WHERE
resource='%s'""" % (url)
- ret = c.execute(cmd)
- row = c.fetchone()
- if row is None:
- print "addXMLMsgArchive failed to get the ID: %s" % (url)
- return -1
- except:
- print "addXMLMsgArchive failed command: %s" % (cmd)
- return -1
-
- return((int)(row[0]))
-
-def updateWordArchive(name, id, relevance):
- global DB
-
- if DB is None:
- openMySQL()
- if DB is None:
- return -1
- if name is None:
- return -1
- if id is None:
- return -1
-
- c = DB.cursor()
- try:
- ret = c.execute(
-"""INSERT INTO wordsArchive (name, id, relevance) VALUES ('%s',
'%d', '%d')""" %
- (name, id, relevance))
- except:
- try:
- ret = c.execute(
-"""UPDATE wordsArchive SET relevance='%d' where name='%s'
and ID='%d'""" %
- (relevance, name, id))
- except:
- print "Update word archive (%s, %d, %d) failed command" % (name,
id, relevance)
- print """UPDATE wordsArchive SET relevance='%d' where
name='%s' and ID='%d'""" % (relevance, name, id)
- print sys.exc_type, sys.exc_value
- return -1
-
- return ret
-
-#########################################################################
-# #
-# Word dictionary and analysis routines #
-# #
-#########################################################################
-
-#
-# top 100 english word without the one len < 3 + own set
-#
-dropWords = {
- 'the':0, 'this':0, 'can':0, 'man':0, 'had':0,
'him':0, 'only':0,
- 'and':0, 'not':0, 'been':0, 'other':0,
'even':0, 'are':0, 'was':0,
- 'new':0, 'most':0, 'but':0, 'when':0,
'some':0, 'made':0, 'from':0,
- 'who':0, 'could':0, 'after':0, 'that':0,
'will':0, 'time':0, 'also':0,
- 'have':0, 'more':0, 'these':0, 'did':0,
'was':0, 'two':0, 'many':0,
- 'they':0, 'may':0, 'before':0, 'for':0,
'which':0, 'out':0, 'then':0,
- 'must':0, 'one':0, 'through':0, 'with':0,
'you':0, 'said':0,
- 'first':0, 'back':0, 'were':0, 'what':0,
'any':0, 'years':0, 'his':0,
- 'her':0, 'where':0, 'all':0, 'its':0,
'now':0, 'much':0, 'she':0,
- 'about':0, 'such':0, 'your':0, 'there':0,
'into':0, 'like':0, 'may':0,
- 'would':0, 'than':0, 'our':0, 'well':0,
'their':0, 'them':0, 'over':0,
- 'down':0,
- 'net':0, 'www':0, 'bad':0, 'Okay':0, 'bin':0,
'cur':0,
-}
-
-wordsDict = {}
-wordsDictHTML = {}
-wordsDictArchive = {}
-
-def cleanupWordsString(str):
- str = string.replace(str, ".", " ")
- str = string.replace(str, "!", " ")
- str = string.replace(str, "?", " ")
- str = string.replace(str, ",", " ")
- str = string.replace(str, "'", " ")
- str = string.replace(str, '"', " ")
- str = string.replace(str, ";", " ")
- str = string.replace(str, "(", " ")
- str = string.replace(str, ")", " ")
- str = string.replace(str, "{", " ")
- str = string.replace(str, "}", " ")
- str = string.replace(str, "<", " ")
- str = string.replace(str, ">", " ")
- str = string.replace(str, "=", " ")
- str = string.replace(str, "/", " ")
- str = string.replace(str, "*", " ")
- str = string.replace(str, ":", " ")
- str = string.replace(str, "#", " ")
- str = string.replace(str, "\\", " ")
- str = string.replace(str, "\n", " ")
- str = string.replace(str, "\r", " ")
- str = string.replace(str, "\xc2", " ")
- str = string.replace(str, "\xa0", " ")
- return str
-
-def cleanupDescrString(str):
- str = string.replace(str, "'", " ")
- str = string.replace(str, "\n", " ")
- str = string.replace(str, "\r", " ")
- str = string.replace(str, "\xc2", " ")
- str = string.replace(str, "\xa0", " ")
- l = string.split(str)
- str = string.join(str)
- return str
-
-def splitIdentifier(str):
- ret = []
- while str != "":
- cur = string.lower(str[0])
- str = str[1:]
- if ((cur < 'a') or (cur > 'z')):
- continue
- while (str != "") and (str[0] >= 'A') and (str[0] <=
'Z'):
- cur = cur + string.lower(str[0])
- str = str[1:]
- while (str != "") and (str[0] >= 'a') and (str[0] <=
'z'):
- cur = cur + str[0]
- str = str[1:]
- while (str != "") and (str[0] >= '0') and (str[0] <=
'9'):
- str = str[1:]
- ret.append(cur)
- return ret
-
-def addWord(word, module, symbol, relevance):
- global wordsDict
-
- if word is None or len(word) < 3:
- return -1
- if module is None or symbol is None:
- return -1
- if dropWords.has_key(word):
- return 0
- if ord(word[0]) > 0x80:
- return 0
-
- if wordsDict.has_key(word):
- d = wordsDict[word]
- if d is None:
- return 0
- if len(d) > 500:
- wordsDict[word] = None
- return 0
- try:
- relevance = relevance + d[(module, symbol)]
- except:
- pass
- else:
- wordsDict[word] = {}
- wordsDict[word][(module, symbol)] = relevance
- return relevance
-
-def addString(str, module, symbol, relevance):
- if str is None or len(str) < 3:
- return -1
- ret = 0
- str = cleanupWordsString(str)
- l = string.split(str)
- for word in l:
- if len(word) > 2:
- ret = ret + addWord(word, module, symbol, 5)
-
- return ret
-
-def addWordHTML(word, resource, id, section, relevance):
- global wordsDictHTML
-
- if word is None or len(word) < 3:
- return -1
- if resource is None or section is None:
- return -1
- if dropWords.has_key(word):
- return 0
- if ord(word[0]) > 0x80:
- return 0
-
- section = cleanupDescrString(section)
-
- if wordsDictHTML.has_key(word):
- d = wordsDictHTML[word]
- if d is None:
- print "skipped %s" % (word)
- return 0
- try:
- (r,i,s) = d[resource]
- if i is not None:
- id = i
- if s is not None:
- section = s
- relevance = relevance + r
- except:
- pass
- else:
- wordsDictHTML[word] = {}
- d = wordsDictHTML[word]
- d[resource] = (relevance, id, section)
- return relevance
-
-def addStringHTML(str, resource, id, section, relevance):
- if str is None or len(str) < 3:
- return -1
- ret = 0
- str = cleanupWordsString(str)
- l = string.split(str)
- for word in l:
- if len(word) > 2:
- try:
- r = addWordHTML(word, resource, id, section, relevance)
- if r < 0:
- print "addWordHTML failed: %s %s" % (word, resource)
- ret = ret + r
- except:
- print "addWordHTML failed: %s %s %d" % (word, resource,
relevance)
- print sys.exc_type, sys.exc_value
-
- return ret
-
-def addWordArchive(word, id, relevance):
- global wordsDictArchive
-
- if word is None or len(word) < 3:
- return -1
- if id is None or id == -1:
- return -1
- if dropWords.has_key(word):
- return 0
- if ord(word[0]) > 0x80:
- return 0
-
- if wordsDictArchive.has_key(word):
- d = wordsDictArchive[word]
- if d is None:
- print "skipped %s" % (word)
- return 0
- try:
- r = d[id]
- relevance = relevance + r
- except:
- pass
- else:
- wordsDictArchive[word] = {}
- d = wordsDictArchive[word]
- d[id] = relevance
- return relevance
-
-def addStringArchive(str, id, relevance):
- if str is None or len(str) < 3:
- return -1
- ret = 0
- str = cleanupWordsString(str)
- l = string.split(str)
- for word in l:
- i = len(word)
- if i > 2:
- try:
- r = addWordArchive(word, id, relevance)
- if r < 0:
- print "addWordArchive failed: %s %s" % (word, id)
- else:
- ret = ret + r
- except:
- print "addWordArchive failed: %s %s %d" % (word, id,
relevance)
- print sys.exc_type, sys.exc_value
- return ret
-
-#########################################################################
-# #
-# XML API description analysis #
-# #
-#########################################################################
-
-def loadAPI(filename):
- doc = libxml2.parseFile(filename)
- print "loaded %s" % (filename)
- return doc
-
-def foundExport(file, symbol):
- if file is None:
- return 0
- if symbol is None:
- return 0
- addFunction(symbol, file)
- l = splitIdentifier(symbol)
- for word in l:
- addWord(word, file, symbol, 10)
- return 1
-
-def analyzeAPIFile(top):
- count = 0
- name = top.prop("name")
- cur = top.children
- while cur is not None:
- if cur.type == 'text':
- cur = cur.next
- continue
- if cur.name == "exports":
- count = count + foundExport(name, cur.prop("symbol"))
- else:
- print "unexpected element %s in API doc <file
name='%s'>" % (name)
- cur = cur.next
- return count
-
-def analyzeAPIFiles(top):
- count = 0
- cur = top.children
-
- while cur is not None:
- if cur.type == 'text':
- cur = cur.next
- continue
- if cur.name == "file":
- count = count + analyzeAPIFile(cur)
- else:
- print "unexpected element %s in API doc <files>" %
(cur.name)
- cur = cur.next
- return count
-
-def analyzeAPIEnum(top):
- file = top.prop("file")
- if file is None:
- return 0
- symbol = top.prop("name")
- if symbol is None:
- return 0
-
- addEnum(symbol, file)
- l = splitIdentifier(symbol)
- for word in l:
- addWord(word, file, symbol, 10)
-
- return 1
-
-def analyzeAPIConst(top):
- file = top.prop("file")
- if file is None:
- return 0
- symbol = top.prop("name")
- if symbol is None:
- return 0
-
- addConst(symbol, file)
- l = splitIdentifier(symbol)
- for word in l:
- addWord(word, file, symbol, 10)
-
- return 1
-
-def analyzeAPIType(top):
- file = top.prop("file")
- if file is None:
- return 0
- symbol = top.prop("name")
- if symbol is None:
- return 0
-
- addType(symbol, file)
- l = splitIdentifier(symbol)
- for word in l:
- addWord(word, file, symbol, 10)
- return 1
-
-def analyzeAPIFunctype(top):
- file = top.prop("file")
- if file is None:
- return 0
- symbol = top.prop("name")
- if symbol is None:
- return 0
-
- addFunctype(symbol, file)
- l = splitIdentifier(symbol)
- for word in l:
- addWord(word, file, symbol, 10)
- return 1
-
-def analyzeAPIStruct(top):
- file = top.prop("file")
- if file is None:
- return 0
- symbol = top.prop("name")
- if symbol is None:
- return 0
-
- addStruct(symbol, file)
- l = splitIdentifier(symbol)
- for word in l:
- addWord(word, file, symbol, 10)
-
- info = top.prop("info")
- if info is not None:
- info = string.replace(info, "'", " ")
- info = string.strip(info)
- l = string.split(info)
- for word in l:
- if len(word) > 2:
- addWord(word, file, symbol, 5)
- return 1
-
-def analyzeAPIMacro(top):
- file = top.prop("file")
- if file is None:
- return 0
- symbol = top.prop("name")
- if symbol is None:
- return 0
- symbol = string.replace(symbol, "'", " ")
- symbol = string.strip(symbol)
-
- info = None
- cur = top.children
- while cur is not None:
- if cur.type == 'text':
- cur = cur.next
- continue
- if cur.name == "info":
- info = cur.content
- break
- cur = cur.next
-
- l = splitIdentifier(symbol)
- for word in l:
- addWord(word, file, symbol, 10)
-
- if info is None:
- addMacro(symbol, file)
- print "Macro %s description has no <info>" % (symbol)
- return 0
-
- info = string.replace(info, "'", " ")
- info = string.strip(info)
- addMacro(symbol, file, info)
- l = string.split(info)
- for word in l:
- if len(word) > 2:
- addWord(word, file, symbol, 5)
- return 1
-
-def analyzeAPIFunction(top):
- file = top.prop("file")
- if file is None:
- return 0
- symbol = top.prop("name")
- if symbol is None:
- return 0
-
- symbol = string.replace(symbol, "'", " ")
- symbol = string.strip(symbol)
- info = None
- cur = top.children
- while cur is not None:
- if cur.type == 'text':
- cur = cur.next
- continue
- if cur.name == "info":
- info = cur.content
- elif cur.name == "return":
- rinfo = cur.prop("info")
- if rinfo is not None:
- rinfo = string.replace(rinfo, "'", " ")
- rinfo = string.strip(rinfo)
- addString(rinfo, file, symbol, 7)
- elif cur.name == "arg":
- ainfo = cur.prop("info")
- if ainfo is not None:
- ainfo = string.replace(ainfo, "'", " ")
- ainfo = string.strip(ainfo)
- addString(ainfo, file, symbol, 5)
- name = cur.prop("name")
- if name is not None:
- name = string.replace(name, "'", " ")
- name = string.strip(name)
- addWord(name, file, symbol, 7)
- cur = cur.next
- if info is None:
- print "Function %s description has no <info>" % (symbol)
- addFunction(symbol, file, "")
- else:
- info = string.replace(info, "'", " ")
- info = string.strip(info)
- addFunction(symbol, file, info)
- addString(info, file, symbol, 5)
-
- l = splitIdentifier(symbol)
- for word in l:
- addWord(word, file, symbol, 10)
-
- return 1
-
-def analyzeAPISymbols(top):
- count = 0
- cur = top.children
-
- while cur is not None:
- if cur.type == 'text':
- cur = cur.next
- continue
- if cur.name == "macro":
- count = count + analyzeAPIMacro(cur)
- elif cur.name == "function":
- count = count + analyzeAPIFunction(cur)
- elif cur.name == "const":
- count = count + analyzeAPIConst(cur)
- elif cur.name == "typedef":
- count = count + analyzeAPIType(cur)
- elif cur.name == "struct":
- count = count + analyzeAPIStruct(cur)
- elif cur.name == "enum":
- count = count + analyzeAPIEnum(cur)
- elif cur.name == "functype":
- count = count + analyzeAPIFunctype(cur)
- else:
- print "unexpected element %s in API doc <files>" %
(cur.name)
- cur = cur.next
- return count
-
-def analyzeAPI(doc):
- count = 0
- if doc is None:
- return -1
- root = doc.getRootElement()
- if root.name != "api":
- print "Unexpected root name"
- return -1
- cur = root.children
- while cur is not None:
- if cur.type == 'text':
- cur = cur.next
- continue
- if cur.name == "files":
- pass
-# count = count + analyzeAPIFiles(cur)
- elif cur.name == "symbols":
- count = count + analyzeAPISymbols(cur)
- else:
- print "unexpected element %s in API doc" % (cur.name)
- cur = cur.next
- return count
-
-#########################################################################
-# #
-# Web pages parsing and analysis #
-# #
-#########################################################################
-
-import glob
-
-def analyzeHTMLText(doc, resource, p, section, id):
- words = 0
- try:
- content = p.content
- words = words + addStringHTML(content, resource, id, section, 5)
- except:
- return -1
- return words
-
-def analyzeHTMLPara(doc, resource, p, section, id):
- words = 0
- try:
- content = p.content
- words = words + addStringHTML(content, resource, id, section, 5)
- except:
- return -1
- return words
-
-def analyzeHTMLPre(doc, resource, p, section, id):
- words = 0
- try:
- content = p.content
- words = words + addStringHTML(content, resource, id, section, 5)
- except:
- return -1
- return words
-
-def analyzeHTML(doc, resource, p, section, id):
- words = 0
- try:
- content = p.content
- words = words + addStringHTML(content, resource, id, section, 5)
- except:
- return -1
- return words
-
-def analyzeHTML(doc, resource):
- para = 0
- ctxt = doc.xpathNewContext()
- try:
- res = ctxt.xpathEval("//head/title")
- title = res[0].content
- except:
- title = "Page %s" % (resource)
- addPage(resource, title)
- try:
- items = ctxt.xpathEval("//h1 | //h2 | //h3 | //text()")
- section = title
- id = ""
- for item in items:
- if item.name == 'h1' or item.name == 'h2' or item.name ==
'h3':
- section = item.content
- if item.prop("id"):
- id = item.prop("id")
- elif item.prop("name"):
- id = item.prop("name")
- elif item.type == 'text':
- analyzeHTMLText(doc, resource, item, section, id)
- para = para + 1
- elif item.name == 'p':
- analyzeHTMLPara(doc, resource, item, section, id)
- para = para + 1
- elif item.name == 'pre':
- analyzeHTMLPre(doc, resource, item, section, id)
- para = para + 1
- else:
- print "Page %s, unexpected %s element" % (resource, item.name)
- except:
- print "Page %s: problem analyzing" % (resource)
- print sys.exc_type, sys.exc_value
-
- return para
-
-def analyzeHTMLPages():
- ret = 0
- HTMLfiles = glob.glob("*.html") + glob.glob("tutorial/*.html") +
\
- glob.glob("CIM/*.html") + glob.glob("ocaml/*.html") +
\
- glob.glob("ruby/*.html")
- for html in HTMLfiles:
- if html[0:3] == "API":
- continue
- if html == "xml.html":
- continue
- try:
- doc = libxml2.parseFile(html)
- except:
- doc = libxml2.htmlParseFile(html, None)
- try:
- res = analyzeHTML(doc, html)
- print "Parsed %s: %d paragraphs" % (html, res)
- ret = ret + 1
- except:
- print "could not parse %s" % (html)
- return ret
-
-#########################################################################
-# #
-# Mail archives parsing and analysis #
-# #
-#########################################################################
-
-import time
-
-def getXMLDateArchive(t=None):
- if t is None:
- t = time.time()
- T = time.gmtime(t)
- month = time.strftime("%B", T)
- year = T[0]
- url = "http://www.redhat.com/archives/libvir-list/%d-%s/date.html" % (year,
month)
- return url
-
-def scanXMLMsgArchive(url, title, force=0):
- if url is None or title is None:
- return 0
-
- ID = checkXMLMsgArchive(url)
- if force == 0 and ID != -1:
- return 0
-
- if ID == -1:
- ID = addXMLMsgArchive(url, title)
- if ID == -1:
- return 0
-
- try:
- print "Loading %s" % (url)
- doc = libxml2.htmlParseFile(url, None)
- except:
- doc = None
- if doc is None:
- print "Failed to parse %s" % (url)
- return 0
-
- addStringArchive(title, ID, 20)
- ctxt = doc.xpathNewContext()
- texts = ctxt.xpathEval("//pre//text()")
- for text in texts:
- addStringArchive(text.content, ID, 5)
-
- return 1
-
-def scanXMLDateArchive(t=None, force=0):
- global wordsDictArchive
-
- wordsDictArchive = {}
-
- url = getXMLDateArchive(t)
- print "loading %s" % (url)
- try:
- doc = libxml2.htmlParseFile(url, None)
- except:
- doc = None
- if doc is None:
- print "Failed to parse %s" % (url)
- return -1
- ctxt = doc.xpathNewContext()
- anchors = ctxt.xpathEval("//a[@href]")
- links = 0
- newmsg = 0
- for anchor in anchors:
- href = anchor.prop("href")
- if href is None or href[0:3] != "msg":
- continue
- try:
- links = links + 1
-
- msg = libxml2.buildURI(href, url)
- title = anchor.content
- if title is not None and title[0:4] == 'Re: ':
- title = title[4:]
- if title is not None and title[0:6] == '[xml] ':
- title = title[6:]
- newmsg = newmsg + scanXMLMsgArchive(msg, title, force)
-
- except:
- pass
-
- return newmsg
-
-
-#########################################################################
-# #
-# Main code: open the DB, the API XML and analyze it #
-# #
-#########################################################################
-def analyzeArchives(t=None, force=0):
- global wordsDictArchive
-
- ret = scanXMLDateArchive(t, force)
- print "Indexed %d words in %d archive pages" % (len(wordsDictArchive),
ret)
-
- i = 0
- skipped = 0
- for word in wordsDictArchive.keys():
- refs = wordsDictArchive[word]
- if refs is None:
- skipped = skipped + 1
- continue
- for id in refs.keys():
- relevance = refs[id]
- updateWordArchive(word, id, relevance)
- i = i + 1
-
- print "Found %d associations in HTML pages" % (i)
-
-def analyzeHTMLTop():
- global wordsDictHTML
-
- ret = analyzeHTMLPages()
- print "Indexed %d words in %d HTML pages" % (len(wordsDictHTML), ret)
-
- i = 0
- skipped = 0
- for word in wordsDictHTML.keys():
- refs = wordsDictHTML[word]
- if refs is None:
- skipped = skipped + 1
- continue
- for resource in refs.keys():
- (relevance, id, section) = refs[resource]
- updateWordHTML(word, resource, section, id, relevance)
- i = i + 1
-
- print "Found %d associations in HTML pages" % (i)
-
-def analyzeAPITop():
- global wordsDict
- global API
-
- try:
- doc = loadAPI(API)
- ret = analyzeAPI(doc)
- print "Analyzed %d blocs" % (ret)
- doc.freeDoc()
- except:
- print "Failed to parse and analyze %s" % (API)
- print sys.exc_type, sys.exc_value
- sys.exit(1)
-
- print "Indexed %d words" % (len(wordsDict))
- i = 0
- skipped = 0
- for word in wordsDict.keys():
- refs = wordsDict[word]
- if refs is None:
- skipped = skipped + 1
- continue
- for (module, symbol) in refs.keys():
- updateWord(word, symbol, refs[(module, symbol)])
- i = i + 1
-
- print "Found %d associations, skipped %d words" % (i, skipped)
-
-def usage():
- print "Usage index.py [--force] [--archive] [--archive-year year]
[--archive-month month] [--API] [--docs]"
- sys.exit(1)
-
-def main():
- try:
- openMySQL()
- except:
- print "Failed to open the database"
- print sys.exc_type, sys.exc_value
- sys.exit(1)
-
- args = sys.argv[1:]
- force = 0
- if args:
- i = 0
- while i < len(args):
- if args[i] == '--force':
- force = 1
- elif args[i] == '--archive':
- analyzeArchives(None, force)
- elif args[i] == '--archive-year':
- i = i + 1
- year = args[i]
- months = ["January", "February", "March",
"April", "May",
- "June", "July", "August",
"September", "October",
- "November", "December"]
- for month in months:
- try:
- str = "%s-%s" % (year, month)
- T = time.strptime(str, "%Y-%B")
- t = time.mktime(T) + 3600 * 24 * 10
- analyzeArchives(t, force)
- except:
- print "Failed to index month archive:"
- print sys.exc_type, sys.exc_value
- elif args[i] == '--archive-month':
- i = i + 1
- month = args[i]
- try:
- T = time.strptime(month, "%Y-%B")
- t = time.mktime(T) + 3600 * 24 * 10
- analyzeArchives(t, force)
- except:
- print "Failed to index month archive:"
- print sys.exc_type, sys.exc_value
- elif args[i] == '--API':
- analyzeAPITop()
- elif args[i] == '--docs':
- analyzeHTMLTop()
- else:
- usage()
- i = i + 1
- else:
- usage()
-
-if __name__ == "__main__":
- main()
--
2.21.0