Magento 2 Documentation  2.3
Documentation for Magento 2 CMS v2.3 (December 2018)
WordsFinder.php
Go to the documentation of this file.
1 <?php
11 
13 {
19  protected $_binaryExtensions = [
20  'jpg', 'jpeg', 'png', 'gif', 'swf', 'mp3', 'avi', 'mov', 'flv', 'jar', 'zip',
21  'eot', 'ttf', 'woff', 'woff2', 'ico', 'svg',
22  ];
23 
29  protected $copyrightString = 'Copyright © Magento, Inc. All rights reserved.';
30 
36  protected $copyingString = 'See COPYING.txt for license details.';
37 
43  protected $copyrightSkipExtensions = ['csv', 'json', 'lock', 'md', 'txt'];
44 
50  protected $copyrightSkipList = [
51  'lib/web/legacy-build.min.js'
52  ];
53 
60 
66  protected $_words = [];
67 
73  protected $_whitelist = [];
74 
80  protected $_baseDir;
81 
88 
94  private $exclude = [];
95 
103  public function __construct($configFiles, $baseDir, $componentRegistrar, $isCopyrightChecked = false)
104  {
105  if (!is_dir($baseDir)) {
106  throw new \Magento\TestFramework\Inspection\Exception("Base directory {$baseDir} does not exist");
107  }
108  $this->_baseDir = str_replace('\\', '/', realpath($baseDir));
109  $this->componentRegistrar = $componentRegistrar;
110 
111  // Load config files
112  if (!is_array($configFiles)) {
113  $configFiles = [$configFiles];
114  }
115  foreach ($configFiles as $configFile) {
116  $this->_loadConfig($configFile);
117  }
118 
119  // Add config files to whitelist, as they surely contain banned words
120  foreach ($configFiles as $configFile) {
121  $configFile = str_replace('\\', '/', realpath($configFile));
122  $this->_whitelist[$configFile] = [];
123  }
124 
125  $this->_normalizeWhitelistPaths();
126 
127  // Final verifications
128  if (!$this->_words) {
129  throw new \Magento\TestFramework\Inspection\Exception('No words to check');
130  }
131 
132  $this->isCopyrightChecked = $isCopyrightChecked;
133  }
134 
141  protected function _loadConfig($configFile)
142  {
143  if (!file_exists($configFile)) {
144  throw new \Magento\TestFramework\Inspection\Exception("Configuration file {$configFile} does not exist");
145  }
146  try {
147  $xml = new \SimpleXMLElement(file_get_contents($configFile));
148  } catch (\Exception $e) {
149  throw new \Magento\TestFramework\Inspection\Exception($e->getMessage(), $e->getCode(), $e);
150  }
151 
152  $this->_extractWords($xml)->_extractWhitelist($xml);
153  }
154 
162  protected function _extractWords(\SimpleXMLElement $configXml)
163  {
164  $words = [];
165  $nodes = $configXml->xpath('//config/words/word');
166  foreach ($nodes as $node) {
167  $words[] = (string)$node;
168  }
169  $words = array_filter($words);
170 
171  $words = array_merge($this->_words, $words);
172  $this->_words = array_unique($words);
173  return $this;
174  }
175 
185  protected function _extractWhitelist(\SimpleXMLElement $configXml)
186  {
187  // Load whitelist entries
188  $whitelist = [];
189  $exclude = [];
190  $nodes = $configXml->xpath('//config/whitelist/item');
191  foreach ($nodes as $node) {
192  $path = $node->xpath('path');
193  if (!$path) {
194  throw new \Magento\TestFramework\Inspection\Exception(
195  'A "path" must be defined for the whitelisted item'
196  );
197  }
198  $component = $node->xpath('component');
199  if ($component) {
200  $componentType = $component[0]->xpath('@type')[0];
201  $componentName = $component[0]->xpath('@name')[0];
202  $path = $this->componentRegistrar->getPath((string)$componentType, (string)$componentName)
203  . '/' . (string)$path[0];
204  } else {
205  $path = $this->_baseDir . '/' . (string)$path[0];
206  }
207 
208  // Words
209  $words = [];
210  $wordNodes = $node->xpath('word');
211  if ($wordNodes) {
212  foreach ($wordNodes as $wordNode) {
213  $words[] = (string)$wordNode;
214  }
215  }
216  $whitelist[$path] = $words;
217 
218  $excludeNodes = $node->xpath('exclude');
219  $excludes = [];
220  if ($excludeNodes) {
221  foreach ($excludeNodes as $extractNode) {
222  $excludes[] = (string)$extractNode;
223  }
224  }
225 
226  if (isset($exclude[$path])) {
227  $exclude[$path] = array_merge($excludes, $exclude[$path]);
228  } else {
229  $exclude[$path] = $excludes;
230  }
231  }
232 
233  // Merge with already present whitelist
234  foreach ($whitelist as $newPath => $newWords) {
235  if (isset($this->_whitelist[$newPath])) {
236  $newWords = array_merge($this->_whitelist[$newPath], $newWords);
237  }
238  $this->_whitelist[$newPath] = array_unique($newWords);
239  }
240 
241  foreach ($exclude as $newPath => $newWords) {
242  if (isset($this->exclude[$newPath])) {
243  $newWords = array_merge($this->exclude[$newPath], $newWords);
244  }
245  $this->exclude[$newPath] = array_unique($newWords);
246  }
247  return $this;
248  }
249 
253  protected function _normalizeWhitelistPaths()
254  {
255  $whitelist = $this->_whitelist;
256  $this->_whitelist = [];
257  foreach ($whitelist as $whitelistFile => $whitelistWords) {
258  $whitelistFile = str_replace('\\', '/', $whitelistFile);
259  $this->_whitelist[$whitelistFile] = $whitelistWords;
260  }
261  }
262 
270  public function findWords($file)
271  {
272  $foundWords = $this->_findWords($file);
273  if (!$foundWords) {
274  return [];
275  }
276 
277  return self::_removeWhitelistedWords($file, $foundWords);
278  }
279 
288  protected function _findWords($file)
289  {
290  $checkContents = !$this->_isBinaryFile($file);
291  $path = $this->getSearchablePath($file);
292  $contents = $checkContents ? file_get_contents($file) : '';
293  if (isset($this->exclude[$file]) && !empty($this->exclude[$file])) {
294  foreach ($this->exclude[$file] as $stringToEliminate) {
295  $contents = str_replace($stringToEliminate, "", $contents);
296  }
297  }
298 
299  $foundWords = [];
300  foreach ($this->_words as $word) {
301  if (stripos($path, $word) !== false || stripos($contents, $word) !== false) {
302  $foundWords[] = $word;
303  }
304  }
305  if ($contents && $this->isCopyrightChecked && !$this->isCopyrightCheckSkipped($file)
306  && (($copyrightStringPosition = mb_strpos($contents, $this->copyrightString)) === false
307  || ($copyingStringPosition = strpos($contents, $this->copyingString)) === false
308  || $copyingStringPosition - $copyrightStringPosition - mb_strlen($this->copyrightString) > 10)
309  ) {
310  $foundWords[] = 'Copyright string is missing';
311  }
312  return $foundWords;
313  }
314 
319  protected function isCopyrightCheckSkipped($path)
320  {
321  if (in_array(pathinfo($path, PATHINFO_EXTENSION), $this->copyrightSkipExtensions)) {
322  return true;
323  }
324  foreach ($this->copyrightSkipList as $dir) {
325  if (strpos($path, $dir) !== false) {
326  return true;
327  }
328  }
329  return false;
330  }
331 
338  protected function _isBinaryFile($file)
339  {
340  return in_array(pathinfo($file, PATHINFO_EXTENSION), $this->_binaryExtensions);
341  }
342 
350  protected function _removeWhitelistedWords($path, $foundWords)
351  {
352  $path = str_replace('\\', '/', $path);
353  foreach ($this->_whitelist as $whitelistPath => $whitelistWords) {
354  if (strncmp($whitelistPath, $path, strlen($whitelistPath)) != 0) {
355  continue;
356  }
357 
358  if (!$whitelistWords) {
359  // All words are permitted there
360  return [];
361  }
362  $foundWords = array_diff($foundWords, $whitelistWords);
363  }
364  return $foundWords;
365  }
366 
373  protected function getSearchablePath($file)
374  {
375  if (strpos($file, $this->_baseDir) === false) {
376  return $file;
377  }
378  return substr($file, strlen($this->_baseDir) + 1);
379  }
380 }
$contents
Definition: website.php:14
$baseDir
Definition: autoload.php:9
__construct($configFiles, $baseDir, $componentRegistrar, $isCopyrightChecked=false)
_extractWhitelist(\SimpleXMLElement $configXml)
_extractWords(\SimpleXMLElement $configXml)