grep-r.py is a Python script that rudimentarily searches for regular expressions in files below a directory. (*The -r in the file name stands for recursively).
In the current version, only files with an .sqlsuffix are considered. This should of course be handled more flexible (see TODO below).
Contrary to grep, it does not print the entire line that mached but only the portion of the regular expression.
It is also different from grep in that it prints the filename of the matched string after the matched portion. This behaviour is so chosen in order to be able to pipe the result of grep-r.py into a program that sorts its input (such as shell command sort or Powershell's command sort-object).
import re
import os
import sys
if len(sys.argv) < 2:
print('Expected: regular expression')
sys.exit(-1)
regexpText = sys.argv[1]
print('regexpText = ' + regexpText)
fileEncoding = 'latin1'
fileSuffix = 'sql'
def findPattern(fileName, regexp):
if 'archiv' in fileName.lower():
return
with open(fileName, encoding=fileEncoding) as file:
for line in file:
match = regexp.search(line)
if match:
print('{:140s} {}'.format(match.group(0), fileName))
def walkTree(under, regepx):
for curDir, dirs, files in os.walk(under):
#
# - curDir: a string that contains the relative path to the «current» directory
#
# - dirs: a list of strings, each of which is a directory name that is present
# in the «current» directory.
#
# - files: a list of strings, each of which is a file name that is present
# in the «current» directory.
depth = curDir.count(os.sep)
for file in filter(lambda F: F.lower().endswith('.' + fileSuffix), files):
findPattern(curDir + '/' + file, regexp)
regexp = re.compile(regexpText, re.I)
walkTree('.', regexp)