Changeset 1757


Ignore:
Timestamp:
Oct 23, 2009 12:33:58 PM (11 years ago)
Author:
jwatson
Message:

Added preliminary checkpointing of PH state via Python's pickle'ing mechanism. Simultaneously slick and scary. This commit only deals with the writing of the state - restoration is in a subsequent commit.

Location:
trunk/coopr/pysp
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/coopr/pysp/ph.py

    r1756 r1757  
    103103
    104104   #
    105    # placeholder for checkpointing via pickleing
    106    #
    107 
    108    def checkpoint(self):
    109 
    110       # TBD - probably want to input iteration to mark the checkpoint filename with.
    111       checkpoint_filename = "checkpoint.test"
    112       print "STARTING TO CHECKPOINT"
    113       # TBD - there are some issues with solvers, solver managers, and plugins. and we
    114       #       probably don't want to store those persistently anyway.
    115       # TBD - create temporaries for the following three attributes, and re-assign at the end.
     105   # checkpoint the current PH state via pickle'ing. the input iteration count
     106   # simply serves as a tag to create the output file name. everything with the
     107   # exception of the _ph_plugin, _solver_manager, and _solver attributes are
     108   # pickled. currently, plugins fail in the pickle process, which is fine as
     109   # JPW doesn't think you want to pickle plugins (particularly the solver and
     110   # solver manager) anyway. For example, you might want to change those later,
     111   # after restoration - and the PH state is independent of how scenario
     112   # sub-problems are solved.
     113   #
     114
     115   def checkpoint(self, iteration_count):
     116
     117      checkpoint_filename = "checkpoint."+str(iteration_count)
     118
     119      tmp_ph_plugin = self._ph_plugin
     120      tmp_solver_manager = self._solver_manager
     121      tmp_solver = self._solver
     122
    116123      self._ph_plugin = None
    117124      self._solver_manager = None
    118       self._solver = None     
     125      self._solver = None
     126     
    119127      checkpoint_file = open(checkpoint_filename, "w")
    120128      pickle.dump(self,checkpoint_file)
    121129      checkpoint_file.close()
    122       # TBD - restore the solvers/plugins
    123       print "CHECKPOINT WRITTEN TO FILE="+checkpoint_filename
     130
     131      self._ph_plugin = tmp_ph_plugin
     132      self._solver_manager = tmp_solver_manager
     133      self._solver = tmp_solver
     134
     135      print "Checkpoint written to file="+checkpoint_filename
    124136   
    125137   #
     
    182194          verbose               does the PH object stream debug/status output? defaults to False.
    183195          output_times          do I output timing statistics? defaults to False (e.g., useful in the case where you want to regression test against baseline output).
     196          checkpoint_interval   how many iterations between writing a checkpoint file containing the entire PH state? defaults to 0, indicating never.
    184197
    185198   """
     
    217230      # PH history
    218231      self._solutions = {}
     232
     233      # the checkpoint interval - expensive operation, but worth it for big models.
     234      # 0 indicates don't checkpoint.
     235      self._checkpoint_interval = 0
    219236
    220237      # all information related to the scenario tree (implicit and explicit).
     
    279296         elif key == "retain_quadratic_binary_terms":
    280297            self._retain_quadratic_binary_terms = kwds[key]
     298         elif key == "checkpoint_interval":
     299            self._checkpoint_interval = kwds[key]           
    281300         else:
    282301            print "Unknown option=" + key + " specified in call to PH constructor"
     
    292311         if os.path.exists(self._rho_setter) is False:
    293312            raise ValueError, "The rho setter script file="+self._rho_setter+" does not exist"
     313
     314      # validate the checkpoint interval.
     315      if self._checkpoint_interval < 0:
     316         raise ValueError, "A negative checkpoint interval with value="+str(self._checkpoint_interval)+" was specified in call to PH constructor"
    294317
    295318      # construct the sub-problem solver.
     
    325348         print "   Output solver log? " + str(self._output_solver_log)
    326349         print "   Output times? " + str(self._output_times)
     350         print "   Checkpoint interval="+str(self._checkpoint_interval)
    327351
    328352   """ Initialize PH with model and scenario data, in preparation for solve().
     
    954978         self._ph_plugin.service().post_iteration_0(self)
    955979
     980      # checkpoint if it's time - which it always is after iteration 0,
     981      # if the interval is >= 1!
     982      if (self._checkpoint_interval > 0):
     983         self.checkpoint(0)
     984
    956985      # there is an upper bound on the number of iterations to execute -
    957986      # the actual bound depends on the converger supplied by the user.
     
    9751004            self.pprint(False,False,True,False)
    9761005
     1006         # we don't technically have to do this at the last iteration,
     1007         # but with checkpointing and re-starts, you're never sure
     1008         # when you're executing the last iteration.
     1009         self.update_weights()
     1010
    9771011         # let plugins know if they care.
    9781012         if len(self._ph_plugin) == 1:
     
    9841018         if self._verbose is True:
    9851019            print "Number of discrete variables fixed=",self._total_fixed_discrete_vars," (total=",self._total_discrete_vars,")"
    986             print "Number of continuous variables fixed=",self._total_fixed_continuous_vars," (total=",self._total_continuous_vars,")"           
    987 
     1020            print "Number of continuous variables fixed=",self._total_fixed_continuous_vars," (total=",self._total_continuous_vars,")"
     1021
     1022         # let plugins know if they care.
     1023         if len(self._ph_plugin) == 1:
     1024            self._ph_plugin.service().post_iteration_k(self)
     1025
     1026         # at this point, all the real work of an iteration is complete.
     1027
     1028         # checkpoint if it's time.
     1029         if (self._checkpoint_interval > 0) and (i % self._checkpoint_interval is 0):
     1030            self.checkpoint(i)
     1031
     1032         # check for early termination.
    9881033         self._converger.update(self._current_iteration, self, self._scenario_tree, self._instances)
    9891034         print "Convergence metric=%12.4f" % self._converger.lastMetric()
     
    9951040               print "PH converged - convergence metric is below threshold="+str(self._converger._convergence_threshold)+" or all discrete variables are fixed"               
    9961041            break
    997 
    998          # TBD - probably want to always update the weights in light of checkpointing
    999          if i != self._max_iterations:
    1000             self.update_weights()
    1001 
    1002          # let plugins know if they care.
    1003          if len(self._ph_plugin) == 1:
    1004             self._ph_plugin.service().post_iteration_k(self)
    10051042
    10061043         # if we're terminating due to exceeding the maximum iteration count, print a message
  • trunk/coopr/pysp/ph_script.py

    r1743 r1757  
    184184                  dest="retain_quadratic_binary_terms",
    185185                  default=False)
     186parser.add_option("--checkpoint-interval",
     187                  help="The number of iterations between writing of a checkpoint file. Default is 0, indicating never.",
     188                  action="store",
     189                  dest="checkpoint_interval",
     190                  type="int",
     191                  default=0)
    186192parser.add_option("--profile",
    187193                  help="Enable profiling of Python code.  The value of this option is the number of functions that are summarized.",
     
    340346                           output_times=options.output_times, \
    341347                           disable_warmstarts=options.disable_warmstarts,
    342                            retain_quadratic_binary_terms=options.retain_quadratic_binary_terms)
     348                           retain_quadratic_binary_terms=options.retain_quadratic_binary_terms,
     349                           checkpoint_interval=options.checkpoint_interval)
    343350   
    344351   ph.initialize(scenario_data_directory_name=options.instance_directory, \
Note: See TracChangeset for help on using the changeset viewer.