的Python应该返回1个多参数

问题描述:

你好,我是在Python初学者,我想执行这个程序来创建一个集合文件的倒排索引:的Python应该返回1个多参数

import sys 
import re 
from porterStemmer import PorterStemmer 
from collections import defaultdict 
from array import array 
import gc 

porter=PorterStemmer() 

class CreateIndex: 

def __init__(self): 
    self.index=defaultdict(list) #the inverted index 


def getStopwords(self): 
    '''get stopwords from the stopwords file''' 
    f=open(self.stopwordsFile, 'r') 
    stopwords=[line.rstrip() for line in f] 
    self.sw=dict.fromkeys(stopwords) 
    f.close() 


def getTerms(self, line): 
    '''given a stream of text, get the terms from the text''' 
    line=line.lower() 
    line=re.sub(r'[^a-z0-9 ]',' ',line) #put spaces instead of non-alphanumeric characters 
    line=line.split() 
    line=[x for x in line if x not in self.sw] #eliminate the stopwords 
    line=[ porter.stem(word, 0, len(word)-1) for word in line] 
    return line 


def parseCollection(self): 
    ''' returns the id, title and text of the next page in the collection ''' 
    doc=[] 
    for line in self.collFile: 
     if line=='</page>\n': 
      break 
     doc.append(line) 

    curPage=''.join(doc) 
    pageid=re.search('<id>(.*?)</id>', curPage, re.DOTALL) 
    pagetitle=re.search('<title>(.*?)</title>', curPage, re.DOTALL) 
    pagetext=re.search('<text>(.*?)</text>', curPage, re.DOTALL) 

    if pageid==None or pagetitle==None or pagetext==None: 
     return {} 

    d={} 
    d['id']=pageid.group(1) 
    d['title']=pagetitle.group(1) 
    d['text']=pagetext.group(1) 

    return d 


def writeIndexToFile(self): 
    '''write the inverted index to the file''' 
    f=open(self.indexFile, 'w') 
    for term in self.index.iterkeys(): 
     postinglist=[] 
     for p in self.index[term]: 
      docID=p[0] 
      positions=p[1] 
      postinglist.append(':'.join([str(docID) ,','.join(map(str,positions))])) 
     print >> f, ''.join((term,'|',';'.join(postinglist))) 

    f.close() 


def getParams(self): 
    '''get the parameters stopwords file, collection file, and the output index file''' 
    param=sys.argv 
    self.stopwordsFile=param[0] 
    self.collectionFile=param[1] 
    self.indexFile=param[2] 


def createIndex(self): 
    '''main of the program, creates the index''' 
    self.getParams() 
    self.collFile=open(self.collectionFile,'r') 
    self.getStopwords() 

    #bug in python garbage collector! 
    #appending to list becomes O(N) instead of O(1) as the size grows if gc is enabled. 
    gc.disable() 

    pagedict={} 
    pagedict=self.parseCollection() 
    #main loop creating the index 
    while pagedict != {}:      
     lines='\n'.join((pagedict['title'],pagedict['text'])) 
     pageid=int(pagedict['id']) 
     terms=self.getTerms(lines) 

     #build the index for the current page 
     termdictPage={} 
     for position, term in enumerate(terms): 
      try: 
       termdictPage[term][1].append(position) 
      except: 
       termdictPage[term]=[pageid, array('I',[position])] 

     #merge the current page index with the main index 
     for termpage, postingpage in termdictPage.iteritems(): 
      self.index[termpage].append(postingpage) 

     pagedict=self.parseCollection() 


    gc.enable() 

    self.writeIndexToFile() 


if __name__=="__main__": 
c=CreateIndex() 
c.createIndex() 

,并说,这是唯一的存在1参数在sys.argv ...

其他参数应该如何显示?

+0

使用调用函数'蟒蛇my_program.py参数1 argument2' –

+1

根据你的'高清getParams(个体经营)'方法,你的程序需要在命令行中得到的参数。在asknig之前尝试理解它(并阅读sys.argv文档)以完成您的工作... –

getParams函数中,可以看到你的代码请求3个参数。 当你打电话给你的程序时:

python your_program.py 
# sys.argv[0] = 'your_program.py' 

它有1个参数。因此,你需要两个:

python your_program.py arg_1 arg_2 
# sys.argv[0] = 'your_program.py' 
# sys.argv[1] = 'arg_1' 
# sys.argv[2] = 'arg_2