diff --git a/Cargo.lock b/Cargo.lock index 8fda27dc82..39858cc62d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1529,6 +1529,7 @@ dependencies = [ "clearscreen", "colored", "fern", + "lazy_static", "log", "notify", "pyo3", diff --git a/Cargo.toml b/Cargo.toml index eed3799363..1bcdad13cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ clap = { version = "3.2.16", features = ["derive"] } clearscreen = { version = "1.0.10" } colored = { version = "2.0.0" } fern = { version = "0.6.1" } +lazy_static = { version = "1.4.0" } log = { version = "0.4.17" } notify = { version = "4.0.17" } pyo3 = { version = "0.16.5", features = ["extension-module", "abi3-py37"] } diff --git a/resources/test/src/baz/__init__.py b/resources/test/src/baz/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/resources/test/src/baz/foo/__init__.py b/resources/test/src/baz/foo/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/resources/test/src/baz/foo/if_tuple.py b/resources/test/src/baz/foo/if_tuple.py new file mode 100644 index 0000000000..4ead2534ea --- /dev/null +++ b/resources/test/src/baz/foo/if_tuple.py @@ -0,0 +1,18 @@ +import os +from multiprocessing import pool as fool +from spr_platform import module +from spr_platform.sub_module import other +import spr_platform.sub_module + +if (1, 2): + pass + +for _ in range(5): + if True: + pass + elif (3, 4): + pass + + +def f(x): + pass \ No newline at end of file diff --git a/resources/test/src/duplicate_argument_name.py b/resources/test/src/duplicate_argument_name.py index b866b4938f..65cffa6b19 100644 --- a/resources/test/src/duplicate_argument_name.py +++ b/resources/test/src/duplicate_argument_name.py @@ -1,3 +1,6 @@ +from baz.foo.if_tuple import f + + def foo(x: int, y: int, x: int) -> None: pass diff --git a/resources/test/src/if_tuple.py b/resources/test/src/if_tuple.py index a68044a651..c56439d9ef 100644 --- a/resources/test/src/if_tuple.py +++ b/resources/test/src/if_tuple.py @@ -1,5 +1,6 @@ import os from multiprocessing import pool as fool +import os.path from spr_platform import module from spr_platform.sub_module import other import spr_platform.sub_module diff --git a/resources/test/src/line_too_long.py b/resources/test/src/line_too_long.py index 337d845359..f3b930c97b 100644 --- a/resources/test/src/line_too_long.py +++ b/resources/test/src/line_too_long.py @@ -2,3 +2,5 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. """ + +from baz.foo import if_tuple diff --git a/src/bin/generate_graph.rs b/src/bin/generate_graph.rs index bf982f5620..29be4e0c24 100644 --- a/src/bin/generate_graph.rs +++ b/src/bin/generate_graph.rs @@ -1,20 +1,344 @@ -use std::path::PathBuf; - use anyhow::Result; +use clap::{Parser, ValueHint}; +use lazy_static::lazy_static; +use log::debug; use rustpython_parser::ast::{Stmt, StmtKind, Suite}; use rustpython_parser::parser; +use std::collections::HashSet; +use std::path::{Path, PathBuf}; +use walkdir::DirEntry; use ::rust_python_linter::fs; +use ::rust_python_linter::logging::set_up_logging; use ::rust_python_linter::visitor::{walk_stmt, Visitor}; +#[derive(Debug, Parser)] +struct Cli { + #[clap(parse(from_os_str), value_hint = ValueHint::DirPath, required = true)] + files: Vec, + #[clap(short, long, action)] + verbose: bool, +} + +lazy_static! { + // The output of: `sys.stdlib_module_names` (on Python 3.10.2) + static ref STDLIB_MODULE_NAMES: HashSet<&'static str> = vec![ + "__future__", + "_abc", + "_aix_support", + "_ast", + "_asyncio", + "_bisect", + "_blake2", + "_bootsubprocess", + "_bz2", + "_codecs", + "_codecs_cn", + "_codecs_hk", + "_codecs_iso2022", + "_codecs_jp", + "_codecs_kr", + "_codecs_tw", + "_collections", + "_collections_abc", + "_compat_pickle", + "_compression", + "_contextvars", + "_crypt", + "_csv", + "_ctypes", + "_curses", + "_curses_panel", + "_datetime", + "_dbm", + "_decimal", + "_elementtree", + "_frozen_importlib", + "_frozen_importlib_external", + "_functools", + "_gdbm", + "_hashlib", + "_heapq", + "_imp", + "_io", + "_json", + "_locale", + "_lsprof", + "_lzma", + "_markupbase", + "_md5", + "_msi", + "_multibytecodec", + "_multiprocessing", + "_opcode", + "_operator", + "_osx_support", + "_overlapped", + "_pickle", + "_posixshmem", + "_posixsubprocess", + "_py_abc", + "_pydecimal", + "_pyio", + "_queue", + "_random", + "_scproxy", + "_sha1", + "_sha256", + "_sha3", + "_sha512", + "_signal", + "_sitebuiltins", + "_socket", + "_sqlite3", + "_sre", + "_ssl", + "_stat", + "_statistics", + "_string", + "_strptime", + "_struct", + "_symtable", + "_thread", + "_threading_local", + "_tkinter", + "_tracemalloc", + "_uuid", + "_warnings", + "_weakref", + "_weakrefset", + "_winapi", + "_zoneinfo", + "abc", + "aifc", + "antigravity", + "argparse", + "array", + "ast", + "asynchat", + "asyncio", + "asyncore", + "atexit", + "audioop", + "base64", + "bdb", + "binascii", + "binhex", + "bisect", + "builtins", + "bz2", + "cProfile", + "calendar", + "cgi", + "cgitb", + "chunk", + "cmath", + "cmd", + "code", + "codecs", + "codeop", + "collections", + "colorsys", + "compileall", + "concurrent", + "configparser", + "contextlib", + "contextvars", + "copy", + "copyreg", + "crypt", + "csv", + "ctypes", + "curses", + "dataclasses", + "datetime", + "dbm", + "decimal", + "difflib", + "dis", + "distutils", + "doctest", + "email", + "encodings", + "ensurepip", + "enum", + "errno", + "faulthandler", + "fcntl", + "filecmp", + "fileinput", + "fnmatch", + "fractions", + "ftplib", + "functools", + "gc", + "genericpath", + "getopt", + "getpass", + "gettext", + "glob", + "graphlib", + "grp", + "gzip", + "hashlib", + "heapq", + "hmac", + "html", + "http", + "idlelib", + "imaplib", + "imghdr", + "imp", + "importlib", + "inspect", + "io", + "ipaddress", + "itertools", + "json", + "keyword", + "lib2to3", + "linecache", + "locale", + "logging", + "lzma", + "mailbox", + "mailcap", + "marshal", + "math", + "mimetypes", + "mmap", + "modulefinder", + "msilib", + "msvcrt", + "multiprocessing", + "netrc", + "nis", + "nntplib", + "nt", + "ntpath", + "nturl2path", + "numbers", + "opcode", + "operator", + "optparse", + "os", + "ossaudiodev", + "pathlib", + "pdb", + "pickle", + "pickletools", + "pipes", + "pkgutil", + "platform", + "plistlib", + "poplib", + "posix", + "posixpath", + "pprint", + "profile", + "pstats", + "pty", + "pwd", + "py_compile", + "pyclbr", + "pydoc", + "pydoc_data", + "pyexpat", + "queue", + "quopri", + "random", + "re", + "readline", + "reprlib", + "resource", + "rlcompleter", + "runpy", + "sched", + "secrets", + "select", + "selectors", + "shelve", + "shlex", + "shutil", + "signal", + "site", + "smtpd", + "smtplib", + "sndhdr", + "socket", + "socketserver", + "spwd", + "sqlite3", + "sre_compile", + "sre_constants", + "sre_parse", + "ssl", + "stat", + "statistics", + "string", + "stringprep", + "struct", + "subprocess", + "sunau", + "symtable", + "sys", + "sysconfig", + "syslog", + "tabnanny", + "tarfile", + "telnetlib", + "tempfile", + "termios", + "textwrap", + "this", + "threading", + "time", + "timeit", + "tkinter", + "token", + "tokenize", + "trace", + "traceback", + "tracemalloc", + "tty", + "turtle", + "turtledemo", + "types", + "typing", + "unicodedata", + "unittest", + "urllib", + "uu", + "uuid", + "venv", + "warnings", + "wave", + "weakref", + "webbrowser", + "winreg", + "winsound", + "wsgiref", + "xdrlib", + "xml", + "xmlrpc", + "zipapp", + "zipfile", + "zipimport", + "zlib", + "zoneinfo", + ] + .into_iter() + .collect(); +} + #[allow(dead_code)] #[derive(Debug)] struct ModuleImport { - module_name: Option, + module_name: String, remote_name: Option, local_name: Option, lineno: usize, - pragma: usize, + level: usize, } #[derive(Default)] @@ -22,20 +346,20 @@ struct ImportVisitor { imports: Vec, } -// Inspired by: https://github.com/blais/snakefood/blob/f902c9a099f7c5bb75154a747bf098259211025d/lib/python/snakefood/find.py#L241 +/// https://github.com/blais/snakefood/blob/f902c9a099f7c5bb75154a747bf098259211025d/lib/python/snakefood/find.py#L130 impl Visitor for ImportVisitor { fn visit_stmt(&mut self, stmt: &Stmt) { match &stmt.node { StmtKind::Import { names } => { for alias in names { self.imports.push(ModuleImport { - module_name: Some(alias.name.clone()), + module_name: alias.name.clone(), remote_name: None, local_name: Some( alias.asname.clone().unwrap_or_else(|| alias.name.clone()), ), lineno: stmt.location.row(), - pragma: 0, + level: 0, }) } } @@ -48,27 +372,26 @@ impl Visitor for ImportVisitor { if module_name == "__future__" { return; } - } - - for alias in names { - if alias.name == "*" { - self.imports.push(ModuleImport { - module_name: module.clone(), - remote_name: None, - local_name: None, - lineno: stmt.location.row(), - pragma: *level, - }) - } else { - self.imports.push(ModuleImport { - module_name: module.clone(), - remote_name: Some(alias.name.clone()), - local_name: Some( - alias.asname.clone().unwrap_or_else(|| alias.name.clone()), - ), - lineno: stmt.location.row(), - pragma: *level, - }) + for alias in names { + if alias.name == "*" { + self.imports.push(ModuleImport { + module_name: module_name.clone(), + remote_name: None, + local_name: None, + lineno: stmt.location.row(), + level: *level, + }) + } else { + self.imports.push(ModuleImport { + module_name: module_name.clone(), + remote_name: Some(alias.name.clone()), + local_name: Some( + alias.asname.clone().unwrap_or_else(|| alias.name.clone()), + ), + lineno: stmt.location.row(), + level: *level, + }) + } } } } @@ -89,10 +412,141 @@ fn collect_imports(python_ast: &Suite) -> Vec { .collect() } +/// https://github.com/blais/snakefood/blob/f902c9a099f7c5bb75154a747bf098259211025d/lib/python/snakefood/find.py#L315 +fn find_dotted_module( + module_name: &str, + remote_name: &Option, + source_roots: &[PathBuf], + level: &usize, +) -> Result> { + // Check for standard library and built-in imports. + if STDLIB_MODULE_NAMES.contains(module_name.split('.').next().unwrap()) { + debug!("Skipping standard library import: {module_name}"); + return Ok(None); + } + + for source_root in source_roots { + // Handle relative imports. + let mut parent_dir: &Path = source_root; + for _ in 0..*level { + parent_dir = parent_dir + .parent() + .ok_or(anyhow::anyhow!("Unable to find parent directory."))?; + } + + let names: Vec<&str> = module_name.split('.').into_iter().collect(); + + // If we can't find the imported module, return None. + // This could be a third-party library, part of the standard library, etc. + match find_dotted(&names, parent_dir)? { + Some(filename) => { + // If this is an `import from`, check if the target symbol is itself a module. + if let Some(remote_name) = remote_name { + if let Some(filename) = find_dotted( + &[remote_name], + &filename + .parent() + .ok_or(anyhow::anyhow!("Unable to find parent directory."))?, + )? { + return Ok(Some(filename)); + } + } + + return Ok(Some(filename)); + } + None => {} + } + } + + // Most likely a third-party module. + debug!("Unable to find import for: {module_name}"); + Ok(None) +} + +/// https://github.com/python/cpython/blob/main/Lib/imp.py#L255 +fn find_module(name: &str, parent_dir: &Path) -> Option { + let package_directory = parent_dir.join(name); + let package_file_name = "__init__.py"; + let file_path = package_directory.join(package_file_name); + if file_path.exists() { + return Some(file_path); + } + + let file_name = format!("{}.py", name); + let file_path = parent_dir.join(file_name); + if file_path.exists() { + return Some(file_path); + } + + None +} + +/// https://github.com/blais/snakefood/blob/f902c9a099f7c5bb75154a747bf098259211025d/lib/python/snakefood/find.py#L371 +fn find_dotted(names: &[&str], parent_dir: &Path) -> Result> { + let mut filename: Option = None; + + for name in names { + let dirname: PathBuf = if let Some(filename) = filename { + filename + .clone() + .parent() + .ok_or(anyhow::anyhow!("Unable to find parent directory."))? + .to_path_buf() + } else { + parent_dir.to_path_buf() + }; + match find_module(name, dirname.as_path()) { + Some(module) => { + filename = Some(module.clone()); + } + None => return Ok(None), + } + } + + Ok(filename) +} + +/// https://github.com/blais/snakefood/blob/f902c9a099f7c5bb75154a747bf098259211025d/lib/python/snakefood/find.py#L30 +fn map_imports_to_files( + imports: &[ModuleImport], + source_roots: &[PathBuf], +) -> Result> { + let mut files: Vec = vec![]; + let mut seen: HashSet<(&String, &Option)> = HashSet::new(); + for import in imports { + let sig = (&import.module_name, &import.remote_name); + if seen.contains(&sig) { + continue; + } + seen.insert(sig); + + match find_dotted_module( + &import.module_name, + &import.remote_name, + source_roots, + &import.level, + )? { + Some(module_filename) => files.push(std::fs::canonicalize(module_filename)?), + None => {} + } + } + + Ok(files) +} + fn main() -> Result<()> { - // What else is required here? Map from modules to files. - let files = fs::iter_python_files(&PathBuf::from("resources/test/src")); - for entry in files { + let cli = Cli::parse(); + + set_up_logging(cli.verbose)?; + + let source_roots: &[PathBuf] = &cli.files; + let files: Vec = source_roots + .iter() + .flat_map(fs::iter_python_files) + .collect(); + for entry in files.iter().take(10) { + println!("--- {} ---", entry.path().to_string_lossy()); + // Read the file from disk. let contents = fs::read_file(entry.path())?; @@ -101,9 +555,20 @@ fn main() -> Result<()> { // Collect imports. let imports = collect_imports(&python_ast); - for import in imports { + for import in &imports { println!("{} imports: {:?}", entry.path().to_string_lossy(), import) } + + // Map from imports to first-party dependencies. + let dependencies = map_imports_to_files(&imports, source_roots)?; + for dependency in dependencies { + println!( + "{} depends on: {:?}", + entry.path().to_string_lossy(), + dependency.to_string_lossy(), + ) + } } + Ok(()) }