Python编码打开和保存数据到文件

Python编码打开和保存数据到文件

问题描述:

我有一个问题让火车功能在Python中正常工作。我无法修改def函数。我处于需要第二个文件读取PosList的第一行的位置,并且我需要匹配OpenPos中的movieWordCount [z]的值。如果该文件在那里,那么我很好地将第2列加入一行代码(按空格分隔)。如果不是,那么我需要将其附加到文件末尾。这是行不通的。它不会追加值,如果它缺少,我不知道它是否会找到值,如果它在那里。我一直坚持让这个工作两天。Python编码打开和保存数据到文件

这里是我的代码段,我有工作:

with open("PosList") as OpenPos: 
    lines = OpenPos.readlines() 
    print lines 
    if movieWordCount[z] in lines: 
     print "found" 

    #Now use tokenize to split it apart by space and set to new array for me to call column2 
    else: 
     print "not found" 
     lines.append(movieWordCount[z] + " 1" + "\n") 

这里是我的全码:

#!/usr/bin/python 

#Import Counter 
import collections 
from collections import Counter 
#Was already here but pickle is used for data input and export 
import math, os, pickle, re 

class Bayes_Classifier: 

def __init__(self, trainDirectory = "movie_reviews/"): 

    #If file listing exists skip to train 
    if os.path.isfile('iFileList'): 
     print "file found" 
     self.train() 
     #self.classify() 

    #If file listing does not exist skip to train 
    if not os.path.isfile('iFileList'): 
     print "no file" 
     newfile = 'iFileList' 
     tempList = set() 
     subDir = './movie_reviews' 
     for filenames in os.listdir(subDir): 
      my_sub_path = os.path.join(os.sep,subDir,filenames) 
      tempList.add(filenames) 
      self.save("filenames", "try3") 
     f = [] 
     for fFileObj in os.walk("movie_reviews/"): 
      f.extend(fFileObj) 
      break 
     pickle.dump(f, open("save.p", "wb")) 
     self.save(f, "try4") 

     with open(newfile, 'wb') as fi: 
      pickle.dump(tempList, fi) 
      #print tempList 

     self.train() 
     #self.classify() 

def train(self):  
    '''Trains the Naive Bayes Sentiment Classifier.''' 
    print "File ready for training" 
    #Open iFileList to use as input for opening movie files 
    x = 0 
    OpenIFileList = open('iFileList','r') 
    print "iFileList now Open" 
    #Loop through the file 
    for line in OpenIFileList: 
     #print "Ready to read lines" 
     #print "reading line " + line 
     if x > 4: 
      if x % 2 == 0: 
       #print line 
       s = line 
       if '-' in s: 
        comp = s.split("'") 
        #print comp[2] 
        print comp[1] #This is What you need for t he movie file 
        compValue1 = comp[1] 
        #Determine Positive/Negative. 
        #compType is the variable I am storing it to. 
        compType = compValue1.split("-",2)[1] 
        #print compType #Prints that middle value like 5 or 1 
        # This will do the work based on the value. 
        if compType == '5': 
        # print "you have a five" #Confirms the loop I am in. 
         #If file does not exists create it 
         if not os.path.exists('PosList'): 
          print "no file" 
          file('PosList', 'w').close() 
         #Open file that needs to be reviewed for word count 
         compValue2 = "movie_reviews/" + compValue1 
         print compValue2 #Prints the directory and file path 
         OpenMovieList = open(compValue2,'r') 
         for commentLine in OpenMovieList: 
          commentPositive = commentLine.split(" ") 
          commentPositiveCounter = Counter(commentPositive) 
          #print commentPositiveCounter # " Comment Pos goes here" 
          #if commentLine != '' or commentLine != ' ': 
          #Get first word, second word, .... 
          if commentLine and (not commentLine.isspace()): 
           movieWordCount = self.tokenize(commentLine) 
           y = len(movieWordCount) #determines length of string 
           print y 
           z = 0 
           #print movieWordCount[0] # Shows the zero position in the file. 
           while z < y: 
            print "position " + str(z) + " word is " + movieWordCount[z] # Shows the word we are at and position id 

            with open("PosList") as OpenPos: 
             lines = OpenPos.readlines() 
             print lines 
             if movieWordCount[z] in lines: 
              print "found" 
             else: 
              print "not found" 
              lines.append(movieWordCount) 


            z = z + 1 

         #Close the files 
         OpenMovieList.close() 
         OpenPos.close() 


     x += 1 
     #for line2 in OpenIFileList.readlines(): 
     #for line in open('myfile','r').readlines(): 
      #do_something(line) 

    #Save results 
    #Close the File List 
    OpenIFileList.close() 



def loadFile(self, sFilename): 
    '''Given a file name, return the contents of the file as a string.''' 

    f = open(sFilename, "r") 
    sTxt = f.read() 
    f.close() 
    return sTxt 

def save(self, dObj, sFilename): 
    '''Given an object and a file name, write the object to the file using pickle.''' 

    f = open(sFilename, "w") 
    p = pickle.Pickler(f) 
    p.dump(dObj) 
    f.close() 

def load(self, sFilename): 
    '''Given a file name, load and return the object stored in the file.''' 

    f = open(sFilename, "r") 
    u = pickle.Unpickler(f) 
    dObj = u.load() 
    f.close() 
    return dObj 

def tokenize(self, sText): 
    '''Given a string of text sText, returns a list of the individual tokens that 
    occur in that string (in order).''' 

    lTokens = [] 
    sToken = "" 
    for c in sText: 
     if re.match("[a-zA-Z0-9]", str(c)) != None or c == "\'" or c == "_" or c == '-': 
      sToken += c 
     else: 
      if sToken != "": 
       lTokens.append(sToken) 
       sToken = "" 
      if c.strip() != "": 
       lTokens.append(str(c.strip())) 

    if sToken != "": 
     lTokens.append(sToken) 

    return lTokens 

打开一个只写一个文件,你可以使用

with open('PosList', 'w') as Open_Pos 

当您使用with表单时,您不需要关闭文件; Python会在with-block结束时为你做这件事。

所以假设你将数据添加到行变量的方法是正确的,你可以删除多余的代码OpenMovieList.close()OpenPos.close(),并追加的两行代码:

with open("PosList") as OpenPos: 
    lines = OpenPos.readlines() 
    print lines 
    if movieWordCount[z] in lines: 
     print "found" 
    else: 
     print "not found" 
     lines.append(movieWordCount) 
with open("PosList", "w") as OpenPos: 
    OpenPos.write(lines) 
+0

我认为你是在带我正确的轨道,但它仍然无法正常工作。我在前面提供的代码中发现了一些错误。主要是用开放东西: 张开( “PosList”)作为OpenPos: 线= OpenPos.readlines() 打印线 I = 0 而I

+0

否则: 打印 “未找到” lines.append(movieWordCount [Z] + '1' + “\ n” 个) 打印线 I + = 1 张开( “PosList”, “w”)作为Open_Pos: Open_Pos.write(行) –

+0

它似乎通过相同的循环我现在无尽的话。我看到它在文件之间跳转,就像它应该是这样,但是不会递增它正在查看的单词,并且定期在转储成千上万字的sam时发现它没有找到,当它应该倾倒排列的句子时。 –