https://github.com/python/cpython
Revision 7a325c385bb547b481abfee8301e9d9f2e58990a authored by cvs2svn on 06 May 1994, 14:16:55 UTC, committed by cvs2svn on 06 May 1994, 14:16:55 UTC
1 parent dc0493a
Raw File
Tip revision: 7a325c385bb547b481abfee8301e9d9f2e58990a authored by cvs2svn on 06 May 1994, 14:16:55 UTC
This commit was manufactured by cvs2svn to create tag 'release102'.
Tip revision: 7a325c3
find.py
# A parallelized "find(1)" using the thread module (SGI only for now).

# This demonstrates the use of a work queue and worker threads.
# It really does do more stats/sec when using multiple threads,
# although the improvement is only about 20-30 percent.

# I'm too lazy to write a command line parser for the full find(1)
# command line syntax, so the predicate it searches for is wired-in,
# see function selector() below.  (It currently searches for files with
# group or world write permission.)

# Usage: parfind.py [-w nworkers] [directory] ...
# Default nworkers is 4, maximum appears to be 8 (on Irix 4.0.2)


import sys
import getopt
import string
import time
import os
from stat import *
import thread


# Work queue class.  Usage:
#   wq = WorkQ()
#   wq.addwork(func, (arg1, arg2, ...)) # one or more calls
#   wq.run(nworkers)
# The work is done when wq.run() completes.
# The function calls executed by the workers may add more work.
# Don't use keyboard interrupts!

class WorkQ:

	# Invariants:

	# - busy and work are only modified when mutex is locked
	# - len(work) is the number of jobs ready to be taken
	# - busy is the number of jobs being done
	# - todo is locked iff there is no work and somebody is busy

	def __init__(self):
		self.mutex = thread.allocate()
		self.todo = thread.allocate()
		self.todo.acquire()
		self.work = []
		self.busy = 0

	def addwork(self, job):
		if not job:
			raise TypeError, 'cannot add null job'
		self.mutex.acquire()
		self.work.append(job)
		self.mutex.release()
		if len(self.work) == 1:
			self.todo.release()

	def _getwork(self):
		self.todo.acquire()
		self.mutex.acquire()
		if self.busy == 0 and len(self.work) == 0:
			self.mutex.release()
			self.todo.release()
			return None
		job = self.work[0]
		del self.work[0]
		self.busy = self.busy + 1
		self.mutex.release()
		if len(self.work) > 0:
			self.todo.release()
		return job

	def _donework(self):
		self.mutex.acquire()
		self.busy = self.busy - 1
		if self.busy == 0 and len(self.work) == 0:
			self.todo.release()
		self.mutex.release()

	def _worker(self):
		while 1:
			job = self._getwork()
			if not job:
				break
			func, args = job
			apply(func, args)
			self._donework()

	def run(self, nworkers):
		if not self.work:
			return # Nothing to do
		for i in range(nworkers-1):
			thread.start_new(self._worker, ())
		self._worker()
		self.todo.acquire()


# Main program

def main():
	nworkers = 4
	opts, args = getopt.getopt(sys.argv[1:], '-w:')
	for opt, arg in opts:
		if opt == '-w':
			nworkers = string.atoi(arg)
	if not args:
		args = [os.curdir]

	wq = WorkQ()
	for dir in args:
		wq.addwork(find, (dir, selector, wq))

	t1 = time.time()
	wq.run(nworkers)
	t2 = time.time()

	sys.stderr.write('Total time ' + `t2-t1` + ' sec.\n')


# The predicate -- defines what files we look for.
# Feel free to change this to suit your purpose

def selector(dir, name, fullname, stat):
	# Look for group or world writable files
	return (stat[ST_MODE] & 0022) != 0


# The find procedure -- calls wq.addwork() for subdirectories

def find(dir, pred, wq):
	try:
		names = os.listdir(dir)
	except os.error, msg:
		print `dir`, ':', msg
		return
	for name in names:
		if name not in (os.curdir, os.pardir):
			fullname = os.path.join(dir, name)
			try:
				stat = os.lstat(fullname)
			except os.error, msg:
				print `fullname`, ':', msg
				continue
			if pred(dir, name, fullname, stat):
				print fullname
			if S_ISDIR(stat[ST_MODE]):
				if not os.path.ismount(fullname):
					wq.addwork(find, (fullname, pred, wq))


# Call the main program

main()
back to top